LLVM  14.0.0
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/KnownBits.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96  cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101  cl::desc("Only use DAG-combiner alias analysis in this"
102  " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109  cl::desc("Bypass the profitability model of load slicing"),
110  cl::init(false));
111 
112 static cl::opt<bool>
113  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114  cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117  EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118  cl::desc("DAG combiner enable merging multiple stores "
119  "into a wider store"));
120 
122  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123  cl::desc("Limit the number of operands to inline for Token Factors"));
124 
126  "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127  cl::desc("Limit the number of times for the same StoreNode and RootNode "
128  "to bail out in store merging dependence check"));
129 
131  "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132  cl::desc("DAG combiner enable reducing the width of load/op/store "
133  "sequence"));
134 
136  "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137  cl::desc("DAG combiner enable load/<replace bytes>/store with "
138  "a narrower store"));
139 
140 namespace {
141 
142  class DAGCombiner {
143  SelectionDAG &DAG;
144  const TargetLowering &TLI;
145  const SelectionDAGTargetInfo *STI;
147  CodeGenOpt::Level OptLevel;
148  bool LegalDAG = false;
149  bool LegalOperations = false;
150  bool LegalTypes = false;
151  bool ForCodeSize;
152  bool DisableGenericCombines;
153 
154  /// Worklist of all of the nodes that need to be simplified.
155  ///
156  /// This must behave as a stack -- new nodes to process are pushed onto the
157  /// back and when processing we pop off of the back.
158  ///
159  /// The worklist will not contain duplicates but may contain null entries
160  /// due to nodes being deleted from the underlying DAG.
161  SmallVector<SDNode *, 64> Worklist;
162 
163  /// Mapping from an SDNode to its position on the worklist.
164  ///
165  /// This is used to find and remove nodes from the worklist (by nulling
166  /// them) when they are deleted from the underlying DAG. It relies on
167  /// stable indices of nodes within the worklist.
168  DenseMap<SDNode *, unsigned> WorklistMap;
169  /// This records all nodes attempted to add to the worklist since we
170  /// considered a new worklist entry. As we keep do not add duplicate nodes
171  /// in the worklist, this is different from the tail of the worklist.
172  SmallSetVector<SDNode *, 32> PruningList;
173 
174  /// Set of nodes which have been combined (at least once).
175  ///
176  /// This is used to allow us to reliably add any operands of a DAG node
177  /// which have not yet been combined to the worklist.
178  SmallPtrSet<SDNode *, 32> CombinedNodes;
179 
180  /// Map from candidate StoreNode to the pair of RootNode and count.
181  /// The count is used to track how many times we have seen the StoreNode
182  /// with the same RootNode bail out in dependence check. If we have seen
183  /// the bail out for the same pair many times over a limit, we won't
184  /// consider the StoreNode with the same RootNode as store merging
185  /// candidate again.
187 
188  // AA - Used for DAG load/store alias analysis.
189  AliasAnalysis *AA;
190 
191  /// When an instruction is simplified, add all users of the instruction to
192  /// the work lists because they might get more simplified now.
193  void AddUsersToWorklist(SDNode *N) {
194  for (SDNode *Node : N->uses())
195  AddToWorklist(Node);
196  }
197 
198  /// Convenient shorthand to add a node and all of its user to the worklist.
199  void AddToWorklistWithUsers(SDNode *N) {
200  AddUsersToWorklist(N);
201  AddToWorklist(N);
202  }
203 
204  // Prune potentially dangling nodes. This is called after
205  // any visit to a node, but should also be called during a visit after any
206  // failed combine which may have created a DAG node.
207  void clearAddedDanglingWorklistEntries() {
208  // Check any nodes added to the worklist to see if they are prunable.
209  while (!PruningList.empty()) {
210  auto *N = PruningList.pop_back_val();
211  if (N->use_empty())
212  recursivelyDeleteUnusedNodes(N);
213  }
214  }
215 
216  SDNode *getNextWorklistEntry() {
217  // Before we do any work, remove nodes that are not in use.
218  clearAddedDanglingWorklistEntries();
219  SDNode *N = nullptr;
220  // The Worklist holds the SDNodes in order, but it may contain null
221  // entries.
222  while (!N && !Worklist.empty()) {
223  N = Worklist.pop_back_val();
224  }
225 
226  if (N) {
227  bool GoodWorklistEntry = WorklistMap.erase(N);
228  (void)GoodWorklistEntry;
229  assert(GoodWorklistEntry &&
230  "Found a worklist entry without a corresponding map entry!");
231  }
232  return N;
233  }
234 
235  /// Call the node-specific routine that folds each particular type of node.
236  SDValue visit(SDNode *N);
237 
238  public:
239  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240  : DAG(D), TLI(D.getTargetLoweringInfo()),
241  STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
242  ForCodeSize = DAG.shouldOptForSize();
243  DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
244 
245  MaximumLegalStoreInBits = 0;
246  // We use the minimum store size here, since that's all we can guarantee
247  // for the scalable vector types.
248  for (MVT VT : MVT::all_valuetypes())
249  if (EVT(VT).isSimple() && VT != MVT::Other &&
250  TLI.isTypeLegal(EVT(VT)) &&
251  VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
252  MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
253  }
254 
255  void ConsiderForPruning(SDNode *N) {
256  // Mark this for potential pruning.
257  PruningList.insert(N);
258  }
259 
260  /// Add to the worklist making sure its instance is at the back (next to be
261  /// processed.)
262  void AddToWorklist(SDNode *N) {
263  assert(N->getOpcode() != ISD::DELETED_NODE &&
264  "Deleted Node added to Worklist");
265 
266  // Skip handle nodes as they can't usefully be combined and confuse the
267  // zero-use deletion strategy.
268  if (N->getOpcode() == ISD::HANDLENODE)
269  return;
270 
271  ConsiderForPruning(N);
272 
273  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
274  Worklist.push_back(N);
275  }
276 
277  /// Remove all instances of N from the worklist.
278  void removeFromWorklist(SDNode *N) {
279  CombinedNodes.erase(N);
280  PruningList.remove(N);
281  StoreRootCountMap.erase(N);
282 
283  auto It = WorklistMap.find(N);
284  if (It == WorklistMap.end())
285  return; // Not in the worklist.
286 
287  // Null out the entry rather than erasing it to avoid a linear operation.
288  Worklist[It->second] = nullptr;
289  WorklistMap.erase(It);
290  }
291 
292  void deleteAndRecombine(SDNode *N);
293  bool recursivelyDeleteUnusedNodes(SDNode *N);
294 
295  /// Replaces all uses of the results of one DAG node with new values.
296  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
297  bool AddTo = true);
298 
299  /// Replaces all uses of the results of one DAG node with new values.
300  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
301  return CombineTo(N, &Res, 1, AddTo);
302  }
303 
304  /// Replaces all uses of the results of one DAG node with new values.
305  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
306  bool AddTo = true) {
307  SDValue To[] = { Res0, Res1 };
308  return CombineTo(N, To, 2, AddTo);
309  }
310 
311  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
312 
313  private:
314  unsigned MaximumLegalStoreInBits;
315 
316  /// Check the specified integer node value to see if it can be simplified or
317  /// if things it uses can be simplified by bit propagation.
318  /// If so, return true.
319  bool SimplifyDemandedBits(SDValue Op) {
320  unsigned BitWidth = Op.getScalarValueSizeInBits();
322  return SimplifyDemandedBits(Op, DemandedBits);
323  }
324 
325  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
326  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
327  KnownBits Known;
328  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
329  return false;
330 
331  // Revisit the node.
332  AddToWorklist(Op.getNode());
333 
334  CommitTargetLoweringOpt(TLO);
335  return true;
336  }
337 
338  /// Check the specified vector node value to see if it can be simplified or
339  /// if things it uses can be simplified as it only uses some of the
340  /// elements. If so, return true.
341  bool SimplifyDemandedVectorElts(SDValue Op) {
342  // TODO: For now just pretend it cannot be simplified.
343  if (Op.getValueType().isScalableVector())
344  return false;
345 
346  unsigned NumElts = Op.getValueType().getVectorNumElements();
347  APInt DemandedElts = APInt::getAllOnes(NumElts);
348  return SimplifyDemandedVectorElts(Op, DemandedElts);
349  }
350 
351  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
352  const APInt &DemandedElts,
353  bool AssumeSingleUse = false);
354  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
355  bool AssumeSingleUse = false);
356 
357  bool CombineToPreIndexedLoadStore(SDNode *N);
358  bool CombineToPostIndexedLoadStore(SDNode *N);
359  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
360  bool SliceUpLoad(SDNode *N);
361 
362  // Scalars have size 0 to distinguish from singleton vectors.
363  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
364  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
365  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
366 
367  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
368  /// load.
369  ///
370  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
371  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
372  /// \param EltNo index of the vector element to load.
373  /// \param OriginalLoad load that EVE came from to be replaced.
374  /// \returns EVE on success SDValue() on failure.
375  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
376  SDValue EltNo,
377  LoadSDNode *OriginalLoad);
378  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
379  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
380  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
381  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
382  SDValue PromoteIntBinOp(SDValue Op);
383  SDValue PromoteIntShiftOp(SDValue Op);
384  SDValue PromoteExtend(SDValue Op);
385  bool PromoteLoad(SDValue Op);
386 
387  /// Call the node-specific routine that knows how to fold each
388  /// particular type of node. If that doesn't do anything, try the
389  /// target-specific DAG combines.
391 
392  // Visitation implementation - Implement dag node combining for different
393  // node types. The semantics are as follows:
394  // Return Value:
395  // SDValue.getNode() == 0 - No change was made
396  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
397  // otherwise - N should be replaced by the returned Operand.
398  //
399  SDValue visitTokenFactor(SDNode *N);
400  SDValue visitMERGE_VALUES(SDNode *N);
401  SDValue visitADD(SDNode *N);
402  SDValue visitADDLike(SDNode *N);
403  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
404  SDValue visitSUB(SDNode *N);
405  SDValue visitADDSAT(SDNode *N);
406  SDValue visitSUBSAT(SDNode *N);
407  SDValue visitADDC(SDNode *N);
408  SDValue visitADDO(SDNode *N);
409  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
410  SDValue visitSUBC(SDNode *N);
411  SDValue visitSUBO(SDNode *N);
412  SDValue visitADDE(SDNode *N);
413  SDValue visitADDCARRY(SDNode *N);
414  SDValue visitSADDO_CARRY(SDNode *N);
415  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
416  SDValue visitSUBE(SDNode *N);
417  SDValue visitSUBCARRY(SDNode *N);
418  SDValue visitSSUBO_CARRY(SDNode *N);
419  SDValue visitMUL(SDNode *N);
420  SDValue visitMULFIX(SDNode *N);
421  SDValue useDivRem(SDNode *N);
422  SDValue visitSDIV(SDNode *N);
423  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
424  SDValue visitUDIV(SDNode *N);
425  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
426  SDValue visitREM(SDNode *N);
427  SDValue visitMULHU(SDNode *N);
428  SDValue visitMULHS(SDNode *N);
429  SDValue visitSMUL_LOHI(SDNode *N);
430  SDValue visitUMUL_LOHI(SDNode *N);
431  SDValue visitMULO(SDNode *N);
432  SDValue visitIMINMAX(SDNode *N);
433  SDValue visitAND(SDNode *N);
434  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
435  SDValue visitOR(SDNode *N);
436  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
437  SDValue visitXOR(SDNode *N);
438  SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
439  SDValue visitSHL(SDNode *N);
440  SDValue visitSRA(SDNode *N);
441  SDValue visitSRL(SDNode *N);
442  SDValue visitFunnelShift(SDNode *N);
443  SDValue visitSHLSAT(SDNode *N);
444  SDValue visitRotate(SDNode *N);
445  SDValue visitABS(SDNode *N);
446  SDValue visitBSWAP(SDNode *N);
447  SDValue visitBITREVERSE(SDNode *N);
448  SDValue visitCTLZ(SDNode *N);
449  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450  SDValue visitCTTZ(SDNode *N);
451  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452  SDValue visitCTPOP(SDNode *N);
453  SDValue visitSELECT(SDNode *N);
454  SDValue visitVSELECT(SDNode *N);
455  SDValue visitSELECT_CC(SDNode *N);
456  SDValue visitSETCC(SDNode *N);
457  SDValue visitSETCCCARRY(SDNode *N);
458  SDValue visitSIGN_EXTEND(SDNode *N);
459  SDValue visitZERO_EXTEND(SDNode *N);
460  SDValue visitANY_EXTEND(SDNode *N);
461  SDValue visitAssertExt(SDNode *N);
462  SDValue visitAssertAlign(SDNode *N);
463  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464  SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465  SDValue visitTRUNCATE(SDNode *N);
466  SDValue visitBITCAST(SDNode *N);
467  SDValue visitFREEZE(SDNode *N);
468  SDValue visitBUILD_PAIR(SDNode *N);
469  SDValue visitFADD(SDNode *N);
470  SDValue visitSTRICT_FADD(SDNode *N);
471  SDValue visitFSUB(SDNode *N);
472  SDValue visitFMUL(SDNode *N);
473  SDValue visitFMA(SDNode *N);
474  SDValue visitFDIV(SDNode *N);
475  SDValue visitFREM(SDNode *N);
476  SDValue visitFSQRT(SDNode *N);
477  SDValue visitFCOPYSIGN(SDNode *N);
478  SDValue visitFPOW(SDNode *N);
479  SDValue visitSINT_TO_FP(SDNode *N);
480  SDValue visitUINT_TO_FP(SDNode *N);
481  SDValue visitFP_TO_SINT(SDNode *N);
482  SDValue visitFP_TO_UINT(SDNode *N);
483  SDValue visitFP_ROUND(SDNode *N);
484  SDValue visitFP_EXTEND(SDNode *N);
485  SDValue visitFNEG(SDNode *N);
486  SDValue visitFABS(SDNode *N);
487  SDValue visitFCEIL(SDNode *N);
488  SDValue visitFTRUNC(SDNode *N);
489  SDValue visitFFLOOR(SDNode *N);
490  SDValue visitFMinMax(SDNode *N);
491  SDValue visitBRCOND(SDNode *N);
492  SDValue visitBR_CC(SDNode *N);
493  SDValue visitLOAD(SDNode *N);
494 
495  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
496  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
497 
498  SDValue visitSTORE(SDNode *N);
499  SDValue visitLIFETIME_END(SDNode *N);
500  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
501  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
502  SDValue visitBUILD_VECTOR(SDNode *N);
503  SDValue visitCONCAT_VECTORS(SDNode *N);
504  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
505  SDValue visitVECTOR_SHUFFLE(SDNode *N);
506  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
507  SDValue visitINSERT_SUBVECTOR(SDNode *N);
508  SDValue visitMLOAD(SDNode *N);
509  SDValue visitMSTORE(SDNode *N);
510  SDValue visitMGATHER(SDNode *N);
511  SDValue visitMSCATTER(SDNode *N);
512  SDValue visitFP_TO_FP16(SDNode *N);
513  SDValue visitFP16_TO_FP(SDNode *N);
514  SDValue visitVECREDUCE(SDNode *N);
515  SDValue visitVPOp(SDNode *N);
516 
517  SDValue visitFADDForFMACombine(SDNode *N);
518  SDValue visitFSUBForFMACombine(SDNode *N);
519  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
520 
521  SDValue XformToShuffleWithZero(SDNode *N);
522  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
523  const SDLoc &DL, SDValue N0,
524  SDValue N1);
525  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
526  SDValue N1);
527  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
528  SDValue N1, SDNodeFlags Flags);
529 
530  SDValue visitShiftByConstant(SDNode *N);
531 
532  SDValue foldSelectOfConstants(SDNode *N);
533  SDValue foldVSelectOfConstants(SDNode *N);
534  SDValue foldBinOpIntoSelect(SDNode *BO);
535  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
536  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
537  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
538  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
539  SDValue N2, SDValue N3, ISD::CondCode CC,
540  bool NotExtCompare = false);
541  SDValue convertSelectOfFPConstantsToLoadOffset(
542  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
543  ISD::CondCode CC);
544  SDValue foldSignChangeInBitcast(SDNode *N);
545  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
546  SDValue N2, SDValue N3, ISD::CondCode CC);
547  SDValue foldSelectOfBinops(SDNode *N);
548  SDValue foldSextSetcc(SDNode *N);
549  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
550  const SDLoc &DL);
551  SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
552  SDValue unfoldMaskedMerge(SDNode *N);
553  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
554  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
555  const SDLoc &DL, bool foldBooleans);
556  SDValue rebuildSetCC(SDValue N);
557 
558  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
559  SDValue &CC, bool MatchStrict = false) const;
560  bool isOneUseSetCC(SDValue N) const;
561 
562  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
563  unsigned HiOp);
564  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
565  SDValue CombineExtLoad(SDNode *N);
566  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
567  SDValue combineRepeatedFPDivisors(SDNode *N);
568  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
569  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
570  SDValue BuildSDIV(SDNode *N);
571  SDValue BuildSDIVPow2(SDNode *N);
572  SDValue BuildUDIV(SDNode *N);
573  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
574  SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
575  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
576  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
577  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
578  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
579  SDNodeFlags Flags, bool Reciprocal);
580  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
581  SDNodeFlags Flags, bool Reciprocal);
582  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
583  bool DemandHighBits = true);
584  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
585  SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
586  SDValue InnerPos, SDValue InnerNeg,
587  unsigned PosOpcode, unsigned NegOpcode,
588  const SDLoc &DL);
589  SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
590  SDValue InnerPos, SDValue InnerNeg,
591  unsigned PosOpcode, unsigned NegOpcode,
592  const SDLoc &DL);
593  SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
594  SDValue MatchLoadCombine(SDNode *N);
595  SDValue mergeTruncStores(StoreSDNode *N);
596  SDValue reduceLoadWidth(SDNode *N);
597  SDValue ReduceLoadOpStoreWidth(SDNode *N);
599  SDValue TransformFPLoadStorePair(SDNode *N);
600  SDValue convertBuildVecZextToZext(SDNode *N);
601  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
602  SDValue reduceBuildVecTruncToBitCast(SDNode *N);
603  SDValue reduceBuildVecToShuffle(SDNode *N);
604  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
605  ArrayRef<int> VectorMask, SDValue VecIn1,
606  SDValue VecIn2, unsigned LeftIdx,
607  bool DidSplitVec);
608  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
609 
610  /// Walk up chain skipping non-aliasing memory nodes,
611  /// looking for aliasing nodes and adding them to the Aliases vector.
612  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
613  SmallVectorImpl<SDValue> &Aliases);
614 
615  /// Return true if there is any possibility that the two addresses overlap.
616  bool mayAlias(SDNode *Op0, SDNode *Op1) const;
617 
618  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
619  /// chain (aliasing node.)
620  SDValue FindBetterChain(SDNode *N, SDValue Chain);
621 
622  /// Try to replace a store and any possibly adjacent stores on
623  /// consecutive chains with better chains. Return true only if St is
624  /// replaced.
625  ///
626  /// Notice that other chains may still be replaced even if the function
627  /// returns false.
628  bool findBetterNeighborChains(StoreSDNode *St);
629 
630  // Helper for findBetterNeighborChains. Walk up store chain add additional
631  // chained stores that do not overlap and can be parallelized.
632  bool parallelizeChainedStores(StoreSDNode *St);
633 
634  /// Holds a pointer to an LSBaseSDNode as well as information on where it
635  /// is located in a sequence of memory operations connected by a chain.
636  struct MemOpLink {
637  // Ptr to the mem node.
638  LSBaseSDNode *MemNode;
639 
640  // Offset from the base ptr.
641  int64_t OffsetFromBase;
642 
643  MemOpLink(LSBaseSDNode *N, int64_t Offset)
644  : MemNode(N), OffsetFromBase(Offset) {}
645  };
646 
647  // Classify the origin of a stored value.
648  enum class StoreSource { Unknown, Constant, Extract, Load };
649  StoreSource getStoreSource(SDValue StoreVal) {
650  switch (StoreVal.getOpcode()) {
651  case ISD::Constant:
652  case ISD::ConstantFP:
653  return StoreSource::Constant;
656  return StoreSource::Extract;
657  case ISD::LOAD:
658  return StoreSource::Load;
659  default:
660  return StoreSource::Unknown;
661  }
662  }
663 
664  /// This is a helper function for visitMUL to check the profitability
665  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
666  /// MulNode is the original multiply, AddNode is (add x, c1),
667  /// and ConstNode is c2.
668  bool isMulAddWithConstProfitable(SDNode *MulNode,
669  SDValue &AddNode,
670  SDValue &ConstNode);
671 
672  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
673  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
674  /// the type of the loaded value to be extended.
675  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
676  EVT LoadResultTy, EVT &ExtVT);
677 
678  /// Helper function to calculate whether the given Load/Store can have its
679  /// width reduced to ExtVT.
680  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
681  EVT &MemVT, unsigned ShAmt = 0);
682 
683  /// Used by BackwardsPropagateMask to find suitable loads.
684  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
685  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
686  ConstantSDNode *Mask, SDNode *&NodeToMask);
687  /// Attempt to propagate a given AND node back to load leaves so that they
688  /// can be combined into narrow loads.
689  bool BackwardsPropagateMask(SDNode *N);
690 
691  /// Helper function for mergeConsecutiveStores which merges the component
692  /// store chains.
693  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
694  unsigned NumStores);
695 
696  /// This is a helper function for mergeConsecutiveStores. When the source
697  /// elements of the consecutive stores are all constants or all extracted
698  /// vector elements, try to merge them into one larger store introducing
699  /// bitcasts if necessary. \return True if a merged store was created.
700  bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
701  EVT MemVT, unsigned NumStores,
702  bool IsConstantSrc, bool UseVector,
703  bool UseTrunc);
704 
705  /// This is a helper function for mergeConsecutiveStores. Stores that
706  /// potentially may be merged with St are placed in StoreNodes. RootNode is
707  /// a chain predecessor to all store candidates.
708  void getStoreMergeCandidates(StoreSDNode *St,
709  SmallVectorImpl<MemOpLink> &StoreNodes,
710  SDNode *&Root);
711 
712  /// Helper function for mergeConsecutiveStores. Checks if candidate stores
713  /// have indirect dependency through their operands. RootNode is the
714  /// predecessor to all stores calculated by getStoreMergeCandidates and is
715  /// used to prune the dependency check. \return True if safe to merge.
716  bool checkMergeStoreCandidatesForDependencies(
717  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
718  SDNode *RootNode);
719 
720  /// This is a helper function for mergeConsecutiveStores. Given a list of
721  /// store candidates, find the first N that are consecutive in memory.
722  /// Returns 0 if there are not at least 2 consecutive stores to try merging.
723  unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
724  int64_t ElementSizeBytes) const;
725 
726  /// This is a helper function for mergeConsecutiveStores. It is used for
727  /// store chains that are composed entirely of constant values.
728  bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
729  unsigned NumConsecutiveStores,
730  EVT MemVT, SDNode *Root, bool AllowVectors);
731 
732  /// This is a helper function for mergeConsecutiveStores. It is used for
733  /// store chains that are composed entirely of extracted vector elements.
734  /// When extracting multiple vector elements, try to store them in one
735  /// vector store rather than a sequence of scalar stores.
736  bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
737  unsigned NumConsecutiveStores, EVT MemVT,
738  SDNode *Root);
739 
740  /// This is a helper function for mergeConsecutiveStores. It is used for
741  /// store chains that are composed entirely of loaded values.
742  bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
743  unsigned NumConsecutiveStores, EVT MemVT,
744  SDNode *Root, bool AllowVectors,
745  bool IsNonTemporalStore, bool IsNonTemporalLoad);
746 
747  /// Merge consecutive store operations into a wide store.
748  /// This optimization uses wide integers or vectors when possible.
749  /// \return true if stores were merged.
750  bool mergeConsecutiveStores(StoreSDNode *St);
751 
752  /// Try to transform a truncation where C is a constant:
753  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
754  ///
755  /// \p N needs to be a truncation and its first operand an AND. Other
756  /// requirements are checked by the function (e.g. that trunc is
757  /// single-use) and if missed an empty SDValue is returned.
758  SDValue distributeTruncateThroughAnd(SDNode *N);
759 
760  /// Helper function to determine whether the target supports operation
761  /// given by \p Opcode for type \p VT, that is, whether the operation
762  /// is legal or custom before legalizing operations, and whether is
763  /// legal (but not custom) after legalization.
764  bool hasOperation(unsigned Opcode, EVT VT) {
765  return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
766  }
767 
768  public:
769  /// Runs the dag combiner on all nodes in the work list
770  void Run(CombineLevel AtLevel);
771 
772  SelectionDAG &getDAG() const { return DAG; }
773 
774  /// Returns a type large enough to hold any valid shift amount - before type
775  /// legalization these can be huge.
776  EVT getShiftAmountTy(EVT LHSTy) {
777  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
778  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
779  }
780 
781  /// This method returns true if we are running before type legalization or
782  /// if the specified VT is legal.
783  bool isTypeLegal(const EVT &VT) {
784  if (!LegalTypes) return true;
785  return TLI.isTypeLegal(VT);
786  }
787 
788  /// Convenience wrapper around TargetLowering::getSetCCResultType
789  EVT getSetCCResultType(EVT VT) const {
790  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
791  }
792 
793  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
794  SDValue OrigLoad, SDValue ExtLoad,
795  ISD::NodeType ExtType);
796  };
797 
798 /// This class is a DAGUpdateListener that removes any deleted
799 /// nodes from the worklist.
800 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
801  DAGCombiner &DC;
802 
803 public:
804  explicit WorklistRemover(DAGCombiner &dc)
805  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
806 
807  void NodeDeleted(SDNode *N, SDNode *E) override {
808  DC.removeFromWorklist(N);
809  }
810 };
811 
812 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
813  DAGCombiner &DC;
814 
815 public:
816  explicit WorklistInserter(DAGCombiner &dc)
817  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
818 
819  // FIXME: Ideally we could add N to the worklist, but this causes exponential
820  // compile time costs in large DAGs, e.g. Halide.
821  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
822 };
823 
824 } // end anonymous namespace
825 
826 //===----------------------------------------------------------------------===//
827 // TargetLowering::DAGCombinerInfo implementation
828 //===----------------------------------------------------------------------===//
829 
831  ((DAGCombiner*)DC)->AddToWorklist(N);
832 }
833 
835 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
836  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
837 }
838 
840 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
841  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
842 }
843 
845 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
846  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
847 }
848 
851  return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
852 }
853 
856  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
857 }
858 
859 //===----------------------------------------------------------------------===//
860 // Helper Functions
861 //===----------------------------------------------------------------------===//
862 
863 void DAGCombiner::deleteAndRecombine(SDNode *N) {
865 
866  // If the operands of this node are only used by the node, they will now be
867  // dead. Make sure to re-visit them and recursively delete dead nodes.
868  for (const SDValue &Op : N->ops())
869  // For an operand generating multiple values, one of the values may
870  // become dead allowing further simplification (e.g. split index
871  // arithmetic from an indexed load).
872  if (Op->hasOneUse() || Op->getNumValues() > 1)
873  AddToWorklist(Op.getNode());
874 
875  DAG.DeleteNode(N);
876 }
877 
878 // APInts must be the same size for most operations, this helper
879 // function zero extends the shorter of the pair so that they match.
880 // We provide an Offset so that we can create bitwidths that won't overflow.
881 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
882  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
883  LHS = LHS.zextOrSelf(Bits);
884  RHS = RHS.zextOrSelf(Bits);
885 }
886 
887 // Return true if this node is a setcc, or is a select_cc
888 // that selects between the target values used for true and false, making it
889 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
890 // the appropriate nodes based on the type of node we are checking. This
891 // simplifies life a bit for the callers.
892 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
893  SDValue &CC, bool MatchStrict) const {
894  if (N.getOpcode() == ISD::SETCC) {
895  LHS = N.getOperand(0);
896  RHS = N.getOperand(1);
897  CC = N.getOperand(2);
898  return true;
899  }
900 
901  if (MatchStrict &&
902  (N.getOpcode() == ISD::STRICT_FSETCC ||
903  N.getOpcode() == ISD::STRICT_FSETCCS)) {
904  LHS = N.getOperand(1);
905  RHS = N.getOperand(2);
906  CC = N.getOperand(3);
907  return true;
908  }
909 
910  if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
911  !TLI.isConstFalseVal(N.getOperand(3)))
912  return false;
913 
914  if (TLI.getBooleanContents(N.getValueType()) ==
916  return false;
917 
918  LHS = N.getOperand(0);
919  RHS = N.getOperand(1);
920  CC = N.getOperand(4);
921  return true;
922 }
923 
924 /// Return true if this is a SetCC-equivalent operation with only one use.
925 /// If this is true, it allows the users to invert the operation for free when
926 /// it is profitable to do so.
927 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
928  SDValue N0, N1, N2;
929  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
930  return true;
931  return false;
932 }
933 
934 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
935  if (!ScalarTy.isSimple())
936  return false;
937 
938  uint64_t MaskForTy = 0ULL;
939  switch (ScalarTy.getSimpleVT().SimpleTy) {
940  case MVT::i8:
941  MaskForTy = 0xFFULL;
942  break;
943  case MVT::i16:
944  MaskForTy = 0xFFFFULL;
945  break;
946  case MVT::i32:
947  MaskForTy = 0xFFFFFFFFULL;
948  break;
949  default:
950  return false;
951  break;
952  }
953 
954  APInt Val;
955  if (ISD::isConstantSplatVector(N, Val))
956  return Val.getLimitedValue() == MaskForTy;
957 
958  return false;
959 }
960 
961 // Determines if it is a constant integer or a splat/build vector of constant
962 // integers (and undefs).
963 // Do not permit build vector implicit truncation.
964 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
965  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
966  return !(Const->isOpaque() && NoOpaques);
967  if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
968  return false;
969  unsigned BitWidth = N.getScalarValueSizeInBits();
970  for (const SDValue &Op : N->op_values()) {
971  if (Op.isUndef())
972  continue;
973  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
974  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
975  (Const->isOpaque() && NoOpaques))
976  return false;
977  }
978  return true;
979 }
980 
981 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
982 // undef's.
983 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
984  if (V.getOpcode() != ISD::BUILD_VECTOR)
985  return false;
986  return isConstantOrConstantVector(V, NoOpaques) ||
988 }
989 
990 // Determine if this an indexed load with an opaque target constant index.
991 static bool canSplitIdx(LoadSDNode *LD) {
992  return MaySplitLoadIndex &&
993  (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
994  !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
995 }
996 
997 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
998  const SDLoc &DL,
999  SDValue N0,
1000  SDValue N1) {
1001  // Currently this only tries to ensure we don't undo the GEP splits done by
1002  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1003  // we check if the following transformation would be problematic:
1004  // (load/store (add, (add, x, offset1), offset2)) ->
1005  // (load/store (add, x, offset1+offset2)).
1006 
1007  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1008  return false;
1009 
1010  if (N0.hasOneUse())
1011  return false;
1012 
1013  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1014  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1015  if (!C1 || !C2)
1016  return false;
1017 
1018  const APInt &C1APIntVal = C1->getAPIntValue();
1019  const APInt &C2APIntVal = C2->getAPIntValue();
1020  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1021  return false;
1022 
1023  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1024  if (CombinedValueIntVal.getBitWidth() > 64)
1025  return false;
1026  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1027 
1028  for (SDNode *Node : N0->uses()) {
1029  auto LoadStore = dyn_cast<MemSDNode>(Node);
1030  if (LoadStore) {
1031  // Is x[offset2] already not a legal addressing mode? If so then
1032  // reassociating the constants breaks nothing (we test offset2 because
1033  // that's the one we hope to fold into the load or store).
1035  AM.HasBaseReg = true;
1036  AM.BaseOffs = C2APIntVal.getSExtValue();
1037  EVT VT = LoadStore->getMemoryVT();
1038  unsigned AS = LoadStore->getAddressSpace();
1039  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1040  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1041  continue;
1042 
1043  // Would x[offset1+offset2] still be a legal addressing mode?
1044  AM.BaseOffs = CombinedValue;
1045  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1046  return true;
1047  }
1048  }
1049 
1050  return false;
1051 }
1052 
1053 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1054 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1055 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1056  SDValue N0, SDValue N1) {
1057  EVT VT = N0.getValueType();
1058 
1059  if (N0.getOpcode() != Opc)
1060  return SDValue();
1061 
1062  SDValue N00 = N0.getOperand(0);
1063  SDValue N01 = N0.getOperand(1);
1064 
1067  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1069  return DAG.getNode(Opc, DL, VT, N00, OpNode);
1070  return SDValue();
1071  }
1072  if (TLI.isReassocProfitable(DAG, N0, N1)) {
1073  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074  // iff (op x, c1) has one use
1075  if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
1076  return DAG.getNode(Opc, DL, VT, OpNode, N01);
1077  return SDValue();
1078  }
1079  }
1080  return SDValue();
1081 }
1082 
1083 // Try to reassociate commutative binops.
1084 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1085  SDValue N1, SDNodeFlags Flags) {
1086  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1087 
1088  // Floating-point reassociation is not allowed without loose FP math.
1089  if (N0.getValueType().isFloatingPoint() ||
1091  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1092  return SDValue();
1093 
1094  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1095  return Combined;
1096  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1097  return Combined;
1098  return SDValue();
1099 }
1100 
1101 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1102  bool AddTo) {
1103  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1104  ++NodesCombined;
1105  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1106  To[0].getNode()->dump(&DAG);
1107  dbgs() << " and " << NumTo - 1 << " other values\n");
1108  for (unsigned i = 0, e = NumTo; i != e; ++i)
1109  assert((!To[i].getNode() ||
1110  N->getValueType(i) == To[i].getValueType()) &&
1111  "Cannot combine value to value of different type!");
1112 
1113  WorklistRemover DeadNodes(*this);
1114  DAG.ReplaceAllUsesWith(N, To);
1115  if (AddTo) {
1116  // Push the new nodes and any users onto the worklist
1117  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1118  if (To[i].getNode()) {
1119  AddToWorklist(To[i].getNode());
1120  AddUsersToWorklist(To[i].getNode());
1121  }
1122  }
1123  }
1124 
1125  // Finally, if the node is now dead, remove it from the graph. The node
1126  // may not be dead if the replacement process recursively simplified to
1127  // something else needing this node.
1128  if (N->use_empty())
1129  deleteAndRecombine(N);
1130  return SDValue(N, 0);
1131 }
1132 
1133 void DAGCombiner::
1134 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1135  // Replace the old value with the new one.
1136  ++NodesCombined;
1137  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1138  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1139  dbgs() << '\n');
1140 
1141  // Replace all uses. If any nodes become isomorphic to other nodes and
1142  // are deleted, make sure to remove them from our worklist.
1143  WorklistRemover DeadNodes(*this);
1144  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1145 
1146  // Push the new node and any (possibly new) users onto the worklist.
1147  AddToWorklistWithUsers(TLO.New.getNode());
1148 
1149  // Finally, if the node is now dead, remove it from the graph. The node
1150  // may not be dead if the replacement process recursively simplified to
1151  // something else needing this node.
1152  if (TLO.Old.getNode()->use_empty())
1153  deleteAndRecombine(TLO.Old.getNode());
1154 }
1155 
1156 /// Check the specified integer node value to see if it can be simplified or if
1157 /// things it uses can be simplified by bit propagation. If so, return true.
1158 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1159  const APInt &DemandedElts,
1160  bool AssumeSingleUse) {
1161  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1162  KnownBits Known;
1163  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1164  AssumeSingleUse))
1165  return false;
1166 
1167  // Revisit the node.
1168  AddToWorklist(Op.getNode());
1169 
1170  CommitTargetLoweringOpt(TLO);
1171  return true;
1172 }
1173 
1174 /// Check the specified vector node value to see if it can be simplified or
1175 /// if things it uses can be simplified as it only uses some of the elements.
1176 /// If so, return true.
1177 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1178  const APInt &DemandedElts,
1179  bool AssumeSingleUse) {
1180  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1181  APInt KnownUndef, KnownZero;
1182  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1183  TLO, 0, AssumeSingleUse))
1184  return false;
1185 
1186  // Revisit the node.
1187  AddToWorklist(Op.getNode());
1188 
1189  CommitTargetLoweringOpt(TLO);
1190  return true;
1191 }
1192 
1193 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1194  SDLoc DL(Load);
1195  EVT VT = Load->getValueType(0);
1196  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1197 
1198  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1199  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1200  WorklistRemover DeadNodes(*this);
1201  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1202  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1203  deleteAndRecombine(Load);
1204  AddToWorklist(Trunc.getNode());
1205 }
1206 
1207 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1208  Replace = false;
1209  SDLoc DL(Op);
1210  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1211  LoadSDNode *LD = cast<LoadSDNode>(Op);
1212  EVT MemVT = LD->getMemoryVT();
1214  : LD->getExtensionType();
1215  Replace = true;
1216  return DAG.getExtLoad(ExtType, DL, PVT,
1217  LD->getChain(), LD->getBasePtr(),
1218  MemVT, LD->getMemOperand());
1219  }
1220 
1221  unsigned Opc = Op.getOpcode();
1222  switch (Opc) {
1223  default: break;
1224  case ISD::AssertSext:
1225  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1226  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1227  break;
1228  case ISD::AssertZext:
1229  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1230  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1231  break;
1232  case ISD::Constant: {
1233  unsigned ExtOpc =
1234  Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1235  return DAG.getNode(ExtOpc, DL, PVT, Op);
1236  }
1237  }
1238 
1239  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1240  return SDValue();
1241  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1242 }
1243 
1244 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1246  return SDValue();
1247  EVT OldVT = Op.getValueType();
1248  SDLoc DL(Op);
1249  bool Replace = false;
1250  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1251  if (!NewOp.getNode())
1252  return SDValue();
1253  AddToWorklist(NewOp.getNode());
1254 
1255  if (Replace)
1256  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1257  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1258  DAG.getValueType(OldVT));
1259 }
1260 
1261 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1262  EVT OldVT = Op.getValueType();
1263  SDLoc DL(Op);
1264  bool Replace = false;
1265  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1266  if (!NewOp.getNode())
1267  return SDValue();
1268  AddToWorklist(NewOp.getNode());
1269 
1270  if (Replace)
1271  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1272  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1273 }
1274 
1275 /// Promote the specified integer binary operation if the target indicates it is
1276 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1277 /// i32 since i16 instructions are longer.
1278 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1279  if (!LegalOperations)
1280  return SDValue();
1281 
1282  EVT VT = Op.getValueType();
1283  if (VT.isVector() || !VT.isInteger())
1284  return SDValue();
1285 
1286  // If operation type is 'undesirable', e.g. i16 on x86, consider
1287  // promoting it.
1288  unsigned Opc = Op.getOpcode();
1289  if (TLI.isTypeDesirableForOp(Opc, VT))
1290  return SDValue();
1291 
1292  EVT PVT = VT;
1293  // Consult target whether it is a good idea to promote this operation and
1294  // what's the right type to promote it to.
1295  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1296  assert(PVT != VT && "Don't know what type to promote to!");
1297 
1298  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1299 
1300  bool Replace0 = false;
1301  SDValue N0 = Op.getOperand(0);
1302  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1303 
1304  bool Replace1 = false;
1305  SDValue N1 = Op.getOperand(1);
1306  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1307  SDLoc DL(Op);
1308 
1309  SDValue RV =
1310  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1311 
1312  // We are always replacing N0/N1's use in N and only need additional
1313  // replacements if there are additional uses.
1314  // Note: We are checking uses of the *nodes* (SDNode) rather than values
1315  // (SDValue) here because the node may reference multiple values
1316  // (for example, the chain value of a load node).
1317  Replace0 &= !N0->hasOneUse();
1318  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1319 
1320  // Combine Op here so it is preserved past replacements.
1321  CombineTo(Op.getNode(), RV);
1322 
1323  // If operands have a use ordering, make sure we deal with
1324  // predecessor first.
1325  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1326  std::swap(N0, N1);
1327  std::swap(NN0, NN1);
1328  }
1329 
1330  if (Replace0) {
1331  AddToWorklist(NN0.getNode());
1332  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1333  }
1334  if (Replace1) {
1335  AddToWorklist(NN1.getNode());
1336  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1337  }
1338  return Op;
1339  }
1340  return SDValue();
1341 }
1342 
1343 /// Promote the specified integer shift operation if the target indicates it is
1344 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1345 /// i32 since i16 instructions are longer.
1346 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1347  if (!LegalOperations)
1348  return SDValue();
1349 
1350  EVT VT = Op.getValueType();
1351  if (VT.isVector() || !VT.isInteger())
1352  return SDValue();
1353 
1354  // If operation type is 'undesirable', e.g. i16 on x86, consider
1355  // promoting it.
1356  unsigned Opc = Op.getOpcode();
1357  if (TLI.isTypeDesirableForOp(Opc, VT))
1358  return SDValue();
1359 
1360  EVT PVT = VT;
1361  // Consult target whether it is a good idea to promote this operation and
1362  // what's the right type to promote it to.
1363  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1364  assert(PVT != VT && "Don't know what type to promote to!");
1365 
1366  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1367 
1368  bool Replace = false;
1369  SDValue N0 = Op.getOperand(0);
1370  SDValue N1 = Op.getOperand(1);
1371  if (Opc == ISD::SRA)
1372  N0 = SExtPromoteOperand(N0, PVT);
1373  else if (Opc == ISD::SRL)
1374  N0 = ZExtPromoteOperand(N0, PVT);
1375  else
1376  N0 = PromoteOperand(N0, PVT, Replace);
1377 
1378  if (!N0.getNode())
1379  return SDValue();
1380 
1381  SDLoc DL(Op);
1382  SDValue RV =
1383  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1384 
1385  if (Replace)
1386  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1387 
1388  // Deal with Op being deleted.
1389  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1390  return RV;
1391  }
1392  return SDValue();
1393 }
1394 
1395 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1396  if (!LegalOperations)
1397  return SDValue();
1398 
1399  EVT VT = Op.getValueType();
1400  if (VT.isVector() || !VT.isInteger())
1401  return SDValue();
1402 
1403  // If operation type is 'undesirable', e.g. i16 on x86, consider
1404  // promoting it.
1405  unsigned Opc = Op.getOpcode();
1406  if (TLI.isTypeDesirableForOp(Opc, VT))
1407  return SDValue();
1408 
1409  EVT PVT = VT;
1410  // Consult target whether it is a good idea to promote this operation and
1411  // what's the right type to promote it to.
1412  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1413  assert(PVT != VT && "Don't know what type to promote to!");
1414  // fold (aext (aext x)) -> (aext x)
1415  // fold (aext (zext x)) -> (zext x)
1416  // fold (aext (sext x)) -> (sext x)
1417  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1418  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1419  }
1420  return SDValue();
1421 }
1422 
1423 bool DAGCombiner::PromoteLoad(SDValue Op) {
1424  if (!LegalOperations)
1425  return false;
1426 
1427  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1428  return false;
1429 
1430  EVT VT = Op.getValueType();
1431  if (VT.isVector() || !VT.isInteger())
1432  return false;
1433 
1434  // If operation type is 'undesirable', e.g. i16 on x86, consider
1435  // promoting it.
1436  unsigned Opc = Op.getOpcode();
1437  if (TLI.isTypeDesirableForOp(Opc, VT))
1438  return false;
1439 
1440  EVT PVT = VT;
1441  // Consult target whether it is a good idea to promote this operation and
1442  // what's the right type to promote it to.
1443  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1444  assert(PVT != VT && "Don't know what type to promote to!");
1445 
1446  SDLoc DL(Op);
1447  SDNode *N = Op.getNode();
1448  LoadSDNode *LD = cast<LoadSDNode>(N);
1449  EVT MemVT = LD->getMemoryVT();
1451  : LD->getExtensionType();
1452  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1453  LD->getChain(), LD->getBasePtr(),
1454  MemVT, LD->getMemOperand());
1455  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1456 
1457  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1458  Result.getNode()->dump(&DAG); dbgs() << '\n');
1459  WorklistRemover DeadNodes(*this);
1460  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1461  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1462  deleteAndRecombine(N);
1463  AddToWorklist(Result.getNode());
1464  return true;
1465  }
1466  return false;
1467 }
1468 
1469 /// Recursively delete a node which has no uses and any operands for
1470 /// which it is the only use.
1471 ///
1472 /// Note that this both deletes the nodes and removes them from the worklist.
1473 /// It also adds any nodes who have had a user deleted to the worklist as they
1474 /// may now have only one use and subject to other combines.
1475 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1476  if (!N->use_empty())
1477  return false;
1478 
1480  Nodes.insert(N);
1481  do {
1482  N = Nodes.pop_back_val();
1483  if (!N)
1484  continue;
1485 
1486  if (N->use_empty()) {
1487  for (const SDValue &ChildN : N->op_values())
1488  Nodes.insert(ChildN.getNode());
1489 
1491  DAG.DeleteNode(N);
1492  } else {
1493  AddToWorklist(N);
1494  }
1495  } while (!Nodes.empty());
1496  return true;
1497 }
1498 
1499 //===----------------------------------------------------------------------===//
1500 // Main DAG Combiner implementation
1501 //===----------------------------------------------------------------------===//
1502 
1503 void DAGCombiner::Run(CombineLevel AtLevel) {
1504  // set the instance variables, so that the various visit routines may use it.
1505  Level = AtLevel;
1506  LegalDAG = Level >= AfterLegalizeDAG;
1507  LegalOperations = Level >= AfterLegalizeVectorOps;
1508  LegalTypes = Level >= AfterLegalizeTypes;
1509 
1510  WorklistInserter AddNodes(*this);
1511 
1512  // Add all the dag nodes to the worklist.
1513  for (SDNode &Node : DAG.allnodes())
1514  AddToWorklist(&Node);
1515 
1516  // Create a dummy node (which is not added to allnodes), that adds a reference
1517  // to the root node, preventing it from being deleted, and tracking any
1518  // changes of the root.
1519  HandleSDNode Dummy(DAG.getRoot());
1520 
1521  // While we have a valid worklist entry node, try to combine it.
1522  while (SDNode *N = getNextWorklistEntry()) {
1523  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1524  // N is deleted from the DAG, since they too may now be dead or may have a
1525  // reduced number of uses, allowing other xforms.
1526  if (recursivelyDeleteUnusedNodes(N))
1527  continue;
1528 
1529  WorklistRemover DeadNodes(*this);
1530 
1531  // If this combine is running after legalizing the DAG, re-legalize any
1532  // nodes pulled off the worklist.
1533  if (LegalDAG) {
1534  SmallSetVector<SDNode *, 16> UpdatedNodes;
1535  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1536 
1537  for (SDNode *LN : UpdatedNodes)
1538  AddToWorklistWithUsers(LN);
1539 
1540  if (!NIsValid)
1541  continue;
1542  }
1543 
1544  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1545 
1546  // Add any operands of the new node which have not yet been combined to the
1547  // worklist as well. Because the worklist uniques things already, this
1548  // won't repeatedly process the same operand.
1549  CombinedNodes.insert(N);
1550  for (const SDValue &ChildN : N->op_values())
1551  if (!CombinedNodes.count(ChildN.getNode()))
1552  AddToWorklist(ChildN.getNode());
1553 
1554  SDValue RV = combine(N);
1555 
1556  if (!RV.getNode())
1557  continue;
1558 
1559  ++NodesCombined;
1560 
1561  // If we get back the same node we passed in, rather than a new node or
1562  // zero, we know that the node must have defined multiple values and
1563  // CombineTo was used. Since CombineTo takes care of the worklist
1564  // mechanics for us, we have no work to do in this case.
1565  if (RV.getNode() == N)
1566  continue;
1567 
1568  assert(N->getOpcode() != ISD::DELETED_NODE &&
1569  RV.getOpcode() != ISD::DELETED_NODE &&
1570  "Node was deleted but visit returned new node!");
1571 
1572  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1573 
1574  if (N->getNumValues() == RV.getNode()->getNumValues())
1575  DAG.ReplaceAllUsesWith(N, RV.getNode());
1576  else {
1577  assert(N->getValueType(0) == RV.getValueType() &&
1578  N->getNumValues() == 1 && "Type mismatch");
1579  DAG.ReplaceAllUsesWith(N, &RV);
1580  }
1581 
1582  // Push the new node and any users onto the worklist. Omit this if the
1583  // new node is the EntryToken (e.g. if a store managed to get optimized
1584  // out), because re-visiting the EntryToken and its users will not uncover
1585  // any additional opportunities, but there may be a large number of such
1586  // users, potentially causing compile time explosion.
1587  if (RV.getOpcode() != ISD::EntryToken) {
1588  AddToWorklist(RV.getNode());
1589  AddUsersToWorklist(RV.getNode());
1590  }
1591 
1592  // Finally, if the node is now dead, remove it from the graph. The node
1593  // may not be dead if the replacement process recursively simplified to
1594  // something else needing this node. This will also take care of adding any
1595  // operands which have lost a user to the worklist.
1596  recursivelyDeleteUnusedNodes(N);
1597  }
1598 
1599  // If the root changed (e.g. it was a dead load, update the root).
1600  DAG.setRoot(Dummy.getValue());
1601  DAG.RemoveDeadNodes();
1602 }
1603 
1604 SDValue DAGCombiner::visit(SDNode *N) {
1605  switch (N->getOpcode()) {
1606  default: break;
1607  case ISD::TokenFactor: return visitTokenFactor(N);
1608  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1609  case ISD::ADD: return visitADD(N);
1610  case ISD::SUB: return visitSUB(N);
1611  case ISD::SADDSAT:
1612  case ISD::UADDSAT: return visitADDSAT(N);
1613  case ISD::SSUBSAT:
1614  case ISD::USUBSAT: return visitSUBSAT(N);
1615  case ISD::ADDC: return visitADDC(N);
1616  case ISD::SADDO:
1617  case ISD::UADDO: return visitADDO(N);
1618  case ISD::SUBC: return visitSUBC(N);
1619  case ISD::SSUBO:
1620  case ISD::USUBO: return visitSUBO(N);
1621  case ISD::ADDE: return visitADDE(N);
1622  case ISD::ADDCARRY: return visitADDCARRY(N);
1623  case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1624  case ISD::SUBE: return visitSUBE(N);
1625  case ISD::SUBCARRY: return visitSUBCARRY(N);
1626  case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1627  case ISD::SMULFIX:
1628  case ISD::SMULFIXSAT:
1629  case ISD::UMULFIX:
1630  case ISD::UMULFIXSAT: return visitMULFIX(N);
1631  case ISD::MUL: return visitMUL(N);
1632  case ISD::SDIV: return visitSDIV(N);
1633  case ISD::UDIV: return visitUDIV(N);
1634  case ISD::SREM:
1635  case ISD::UREM: return visitREM(N);
1636  case ISD::MULHU: return visitMULHU(N);
1637  case ISD::MULHS: return visitMULHS(N);
1638  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1639  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1640  case ISD::SMULO:
1641  case ISD::UMULO: return visitMULO(N);
1642  case ISD::SMIN:
1643  case ISD::SMAX:
1644  case ISD::UMIN:
1645  case ISD::UMAX: return visitIMINMAX(N);
1646  case ISD::AND: return visitAND(N);
1647  case ISD::OR: return visitOR(N);
1648  case ISD::XOR: return visitXOR(N);
1649  case ISD::SHL: return visitSHL(N);
1650  case ISD::SRA: return visitSRA(N);
1651  case ISD::SRL: return visitSRL(N);
1652  case ISD::ROTR:
1653  case ISD::ROTL: return visitRotate(N);
1654  case ISD::FSHL:
1655  case ISD::FSHR: return visitFunnelShift(N);
1656  case ISD::SSHLSAT:
1657  case ISD::USHLSAT: return visitSHLSAT(N);
1658  case ISD::ABS: return visitABS(N);
1659  case ISD::BSWAP: return visitBSWAP(N);
1660  case ISD::BITREVERSE: return visitBITREVERSE(N);
1661  case ISD::CTLZ: return visitCTLZ(N);
1662  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1663  case ISD::CTTZ: return visitCTTZ(N);
1664  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1665  case ISD::CTPOP: return visitCTPOP(N);
1666  case ISD::SELECT: return visitSELECT(N);
1667  case ISD::VSELECT: return visitVSELECT(N);
1668  case ISD::SELECT_CC: return visitSELECT_CC(N);
1669  case ISD::SETCC: return visitSETCC(N);
1670  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1671  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1672  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1673  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1674  case ISD::AssertSext:
1675  case ISD::AssertZext: return visitAssertExt(N);
1676  case ISD::AssertAlign: return visitAssertAlign(N);
1677  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1679  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1680  case ISD::TRUNCATE: return visitTRUNCATE(N);
1681  case ISD::BITCAST: return visitBITCAST(N);
1682  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1683  case ISD::FADD: return visitFADD(N);
1684  case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1685  case ISD::FSUB: return visitFSUB(N);
1686  case ISD::FMUL: return visitFMUL(N);
1687  case ISD::FMA: return visitFMA(N);
1688  case ISD::FDIV: return visitFDIV(N);
1689  case ISD::FREM: return visitFREM(N);
1690  case ISD::FSQRT: return visitFSQRT(N);
1691  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1692  case ISD::FPOW: return visitFPOW(N);
1693  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1694  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1695  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1696  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1697  case ISD::FP_ROUND: return visitFP_ROUND(N);
1698  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1699  case ISD::FNEG: return visitFNEG(N);
1700  case ISD::FABS: return visitFABS(N);
1701  case ISD::FFLOOR: return visitFFLOOR(N);
1702  case ISD::FMINNUM:
1703  case ISD::FMAXNUM:
1704  case ISD::FMINIMUM:
1705  case ISD::FMAXIMUM: return visitFMinMax(N);
1706  case ISD::FCEIL: return visitFCEIL(N);
1707  case ISD::FTRUNC: return visitFTRUNC(N);
1708  case ISD::BRCOND: return visitBRCOND(N);
1709  case ISD::BR_CC: return visitBR_CC(N);
1710  case ISD::LOAD: return visitLOAD(N);
1711  case ISD::STORE: return visitSTORE(N);
1712  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1713  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1714  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1715  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1716  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1717  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1718  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1719  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1720  case ISD::MGATHER: return visitMGATHER(N);
1721  case ISD::MLOAD: return visitMLOAD(N);
1722  case ISD::MSCATTER: return visitMSCATTER(N);
1723  case ISD::MSTORE: return visitMSTORE(N);
1724  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1725  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1726  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1727  case ISD::FREEZE: return visitFREEZE(N);
1728  case ISD::VECREDUCE_FADD:
1729  case ISD::VECREDUCE_FMUL:
1730  case ISD::VECREDUCE_ADD:
1731  case ISD::VECREDUCE_MUL:
1732  case ISD::VECREDUCE_AND:
1733  case ISD::VECREDUCE_OR:
1734  case ISD::VECREDUCE_XOR:
1735  case ISD::VECREDUCE_SMAX:
1736  case ISD::VECREDUCE_SMIN:
1737  case ISD::VECREDUCE_UMAX:
1738  case ISD::VECREDUCE_UMIN:
1739  case ISD::VECREDUCE_FMAX:
1740  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1741 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1742 #include "llvm/IR/VPIntrinsics.def"
1743  return visitVPOp(N);
1744  }
1745  return SDValue();
1746 }
1747 
1749  SDValue RV;
1750  if (!DisableGenericCombines)
1751  RV = visit(N);
1752 
1753  // If nothing happened, try a target-specific DAG combine.
1754  if (!RV.getNode()) {
1755  assert(N->getOpcode() != ISD::DELETED_NODE &&
1756  "Node was deleted but visit returned NULL!");
1757 
1758  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1759  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1760 
1761  // Expose the DAG combiner to the target combiner impls.
1763  DagCombineInfo(DAG, Level, false, this);
1764 
1765  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1766  }
1767  }
1768 
1769  // If nothing happened still, try promoting the operation.
1770  if (!RV.getNode()) {
1771  switch (N->getOpcode()) {
1772  default: break;
1773  case ISD::ADD:
1774  case ISD::SUB:
1775  case ISD::MUL:
1776  case ISD::AND:
1777  case ISD::OR:
1778  case ISD::XOR:
1779  RV = PromoteIntBinOp(SDValue(N, 0));
1780  break;
1781  case ISD::SHL:
1782  case ISD::SRA:
1783  case ISD::SRL:
1784  RV = PromoteIntShiftOp(SDValue(N, 0));
1785  break;
1786  case ISD::SIGN_EXTEND:
1787  case ISD::ZERO_EXTEND:
1788  case ISD::ANY_EXTEND:
1789  RV = PromoteExtend(SDValue(N, 0));
1790  break;
1791  case ISD::LOAD:
1792  if (PromoteLoad(SDValue(N, 0)))
1793  RV = SDValue(N, 0);
1794  break;
1795  }
1796  }
1797 
1798  // If N is a commutative binary node, try to eliminate it if the commuted
1799  // version is already present in the DAG.
1800  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1801  N->getNumValues() == 1) {
1802  SDValue N0 = N->getOperand(0);
1803  SDValue N1 = N->getOperand(1);
1804 
1805  // Constant operands are canonicalized to RHS.
1806  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1807  SDValue Ops[] = {N1, N0};
1808  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1809  N->getFlags());
1810  if (CSENode)
1811  return SDValue(CSENode, 0);
1812  }
1813  }
1814 
1815  return RV;
1816 }
1817 
1818 /// Given a node, return its input chain if it has one, otherwise return a null
1819 /// sd operand.
1821  if (unsigned NumOps = N->getNumOperands()) {
1822  if (N->getOperand(0).getValueType() == MVT::Other)
1823  return N->getOperand(0);
1824  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1825  return N->getOperand(NumOps-1);
1826  for (unsigned i = 1; i < NumOps-1; ++i)
1827  if (N->getOperand(i).getValueType() == MVT::Other)
1828  return N->getOperand(i);
1829  }
1830  return SDValue();
1831 }
1832 
1833 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1834  // If N has two operands, where one has an input chain equal to the other,
1835  // the 'other' chain is redundant.
1836  if (N->getNumOperands() == 2) {
1837  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1838  return N->getOperand(0);
1839  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1840  return N->getOperand(1);
1841  }
1842 
1843  // Don't simplify token factors if optnone.
1844  if (OptLevel == CodeGenOpt::None)
1845  return SDValue();
1846 
1847  // Don't simplify the token factor if the node itself has too many operands.
1848  if (N->getNumOperands() > TokenFactorInlineLimit)
1849  return SDValue();
1850 
1851  // If the sole user is a token factor, we should make sure we have a
1852  // chance to merge them together. This prevents TF chains from inhibiting
1853  // optimizations.
1854  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1855  AddToWorklist(*(N->use_begin()));
1856 
1857  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1858  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1859  SmallPtrSet<SDNode*, 16> SeenOps;
1860  bool Changed = false; // If we should replace this token factor.
1861 
1862  // Start out with this token factor.
1863  TFs.push_back(N);
1864 
1865  // Iterate through token factors. The TFs grows when new token factors are
1866  // encountered.
1867  for (unsigned i = 0; i < TFs.size(); ++i) {
1868  // Limit number of nodes to inline, to avoid quadratic compile times.
1869  // We have to add the outstanding Token Factors to Ops, otherwise we might
1870  // drop Ops from the resulting Token Factors.
1871  if (Ops.size() > TokenFactorInlineLimit) {
1872  for (unsigned j = i; j < TFs.size(); j++)
1873  Ops.emplace_back(TFs[j], 0);
1874  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1875  // combiner worklist later.
1876  TFs.resize(i);
1877  break;
1878  }
1879 
1880  SDNode *TF = TFs[i];
1881  // Check each of the operands.
1882  for (const SDValue &Op : TF->op_values()) {
1883  switch (Op.getOpcode()) {
1884  case ISD::EntryToken:
1885  // Entry tokens don't need to be added to the list. They are
1886  // redundant.
1887  Changed = true;
1888  break;
1889 
1890  case ISD::TokenFactor:
1891  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1892  // Queue up for processing.
1893  TFs.push_back(Op.getNode());
1894  Changed = true;
1895  break;
1896  }
1898 
1899  default:
1900  // Only add if it isn't already in the list.
1901  if (SeenOps.insert(Op.getNode()).second)
1902  Ops.push_back(Op);
1903  else
1904  Changed = true;
1905  break;
1906  }
1907  }
1908  }
1909 
1910  // Re-visit inlined Token Factors, to clean them up in case they have been
1911  // removed. Skip the first Token Factor, as this is the current node.
1912  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1913  AddToWorklist(TFs[i]);
1914 
1915  // Remove Nodes that are chained to another node in the list. Do so
1916  // by walking up chains breath-first stopping when we've seen
1917  // another operand. In general we must climb to the EntryNode, but we can exit
1918  // early if we find all remaining work is associated with just one operand as
1919  // no further pruning is possible.
1920 
1921  // List of nodes to search through and original Ops from which they originate.
1923  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1924  SmallPtrSet<SDNode *, 16> SeenChains;
1925  bool DidPruneOps = false;
1926 
1927  unsigned NumLeftToConsider = 0;
1928  for (const SDValue &Op : Ops) {
1929  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1930  OpWorkCount.push_back(1);
1931  }
1932 
1933  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1934  // If this is an Op, we can remove the op from the list. Remark any
1935  // search associated with it as from the current OpNumber.
1936  if (SeenOps.contains(Op)) {
1937  Changed = true;
1938  DidPruneOps = true;
1939  unsigned OrigOpNumber = 0;
1940  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1941  OrigOpNumber++;
1942  assert((OrigOpNumber != Ops.size()) &&
1943  "expected to find TokenFactor Operand");
1944  // Re-mark worklist from OrigOpNumber to OpNumber
1945  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1946  if (Worklist[i].second == OrigOpNumber) {
1947  Worklist[i].second = OpNumber;
1948  }
1949  }
1950  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1951  OpWorkCount[OrigOpNumber] = 0;
1952  NumLeftToConsider--;
1953  }
1954  // Add if it's a new chain
1955  if (SeenChains.insert(Op).second) {
1956  OpWorkCount[OpNumber]++;
1957  Worklist.push_back(std::make_pair(Op, OpNumber));
1958  }
1959  };
1960 
1961  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1962  // We need at least be consider at least 2 Ops to prune.
1963  if (NumLeftToConsider <= 1)
1964  break;
1965  auto CurNode = Worklist[i].first;
1966  auto CurOpNumber = Worklist[i].second;
1967  assert((OpWorkCount[CurOpNumber] > 0) &&
1968  "Node should not appear in worklist");
1969  switch (CurNode->getOpcode()) {
1970  case ISD::EntryToken:
1971  // Hitting EntryToken is the only way for the search to terminate without
1972  // hitting
1973  // another operand's search. Prevent us from marking this operand
1974  // considered.
1975  NumLeftToConsider++;
1976  break;
1977  case ISD::TokenFactor:
1978  for (const SDValue &Op : CurNode->op_values())
1979  AddToWorklist(i, Op.getNode(), CurOpNumber);
1980  break;
1981  case ISD::LIFETIME_START:
1982  case ISD::LIFETIME_END:
1983  case ISD::CopyFromReg:
1984  case ISD::CopyToReg:
1985  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1986  break;
1987  default:
1988  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1989  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1990  break;
1991  }
1992  OpWorkCount[CurOpNumber]--;
1993  if (OpWorkCount[CurOpNumber] == 0)
1994  NumLeftToConsider--;
1995  }
1996 
1997  // If we've changed things around then replace token factor.
1998  if (Changed) {
1999  SDValue Result;
2000  if (Ops.empty()) {
2001  // The entry token is the only possible outcome.
2002  Result = DAG.getEntryNode();
2003  } else {
2004  if (DidPruneOps) {
2005  SmallVector<SDValue, 8> PrunedOps;
2006  //
2007  for (const SDValue &Op : Ops) {
2008  if (SeenChains.count(Op.getNode()) == 0)
2009  PrunedOps.push_back(Op);
2010  }
2011  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2012  } else {
2013  Result = DAG.getTokenFactor(SDLoc(N), Ops);
2014  }
2015  }
2016  return Result;
2017  }
2018  return SDValue();
2019 }
2020 
2021 /// MERGE_VALUES can always be eliminated.
2022 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2023  WorklistRemover DeadNodes(*this);
2024  // Replacing results may cause a different MERGE_VALUES to suddenly
2025  // be CSE'd with N, and carry its uses with it. Iterate until no
2026  // uses remain, to ensure that the node can be safely deleted.
2027  // First add the users of this node to the work list so that they
2028  // can be tried again once they have new operands.
2029  AddUsersToWorklist(N);
2030  do {
2031  // Do as a single replacement to avoid rewalking use lists.
2033  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2034  Ops.push_back(N->getOperand(i));
2035  DAG.ReplaceAllUsesWith(N, Ops.data());
2036  } while (!N->use_empty());
2037  deleteAndRecombine(N);
2038  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2039 }
2040 
2041 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2042 /// ConstantSDNode pointer else nullptr.
2044  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2045  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2046 }
2047 
2048 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2049 /// and that N may be folded in the load / store addressing mode.
2051  const TargetLowering &TLI) {
2052  EVT VT;
2053  unsigned AS;
2054 
2055  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2056  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2057  return false;
2058  VT = LD->getMemoryVT();
2059  AS = LD->getAddressSpace();
2060  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2061  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2062  return false;
2063  VT = ST->getMemoryVT();
2064  AS = ST->getAddressSpace();
2065  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2066  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2067  return false;
2068  VT = LD->getMemoryVT();
2069  AS = LD->getAddressSpace();
2070  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2071  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2072  return false;
2073  VT = ST->getMemoryVT();
2074  AS = ST->getAddressSpace();
2075  } else
2076  return false;
2077 
2079  if (N->getOpcode() == ISD::ADD) {
2080  AM.HasBaseReg = true;
2081  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2082  if (Offset)
2083  // [reg +/- imm]
2084  AM.BaseOffs = Offset->getSExtValue();
2085  else
2086  // [reg +/- reg]
2087  AM.Scale = 1;
2088  } else if (N->getOpcode() == ISD::SUB) {
2089  AM.HasBaseReg = true;
2090  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2091  if (Offset)
2092  // [reg +/- imm]
2093  AM.BaseOffs = -Offset->getSExtValue();
2094  else
2095  // [reg +/- reg]
2096  AM.Scale = 1;
2097  } else
2098  return false;
2099 
2100  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2101  VT.getTypeForEVT(*DAG.getContext()), AS);
2102 }
2103 
2104 /// This inverts a canonicalization in IR that replaces a variable select arm
2105 /// with an identity constant. Codegen improves if we re-use the variable
2106 /// operand rather than load a constant. This can also be converted into a
2107 /// masked vector operation if the target supports it.
2109  bool ShouldCommuteOperands) {
2110  // Match a select as operand 1. The identity constant that we are looking for
2111  // is only valid as operand 1 of a non-commutative binop.
2112  SDValue N0 = N->getOperand(0);
2113  SDValue N1 = N->getOperand(1);
2114  if (ShouldCommuteOperands)
2115  std::swap(N0, N1);
2116 
2117  // TODO: Should this apply to scalar select too?
2118  if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
2119  return SDValue();
2120 
2121  unsigned Opcode = N->getOpcode();
2122  EVT VT = N->getValueType(0);
2123  SDValue Cond = N1.getOperand(0);
2124  SDValue TVal = N1.getOperand(1);
2125  SDValue FVal = N1.getOperand(2);
2126 
2127  // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
2128  // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
2129  // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
2130  auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
2132  switch (Opcode) {
2133  case ISD::FADD: // X + -0.0 --> X
2134  return C->isZero() && C->isNegative();
2135  case ISD::FSUB: // X - 0.0 --> X
2136  return C->isZero() && !C->isNegative();
2137  case ISD::FMUL: // X * 1.0 --> X
2138  case ISD::FDIV: // X / 1.0 --> X
2139  return C->isExactlyValue(1.0);
2140  }
2141  }
2142  return false;
2143  };
2144 
2145  // This transform increases uses of N0, so freeze it to be safe.
2146  // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2147  if (isIdentityConstantForOpcode(Opcode, TVal)) {
2148  SDValue F0 = DAG.getFreeze(N0);
2149  SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2150  return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2151  }
2152  // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2153  if (isIdentityConstantForOpcode(Opcode, FVal)) {
2154  SDValue F0 = DAG.getFreeze(N0);
2155  SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2156  return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2157  }
2158 
2159  return SDValue();
2160 }
2161 
2162 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2163  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2164  "Unexpected binary operator");
2165 
2166  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2167  auto BinOpcode = BO->getOpcode();
2168  EVT VT = BO->getValueType(0);
2169  if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2170  if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2171  return Sel;
2172 
2173  if (TLI.isCommutativeBinOp(BO->getOpcode()))
2174  if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2175  return Sel;
2176  }
2177 
2178  // Don't do this unless the old select is going away. We want to eliminate the
2179  // binary operator, not replace a binop with a select.
2180  // TODO: Handle ISD::SELECT_CC.
2181  unsigned SelOpNo = 0;
2182  SDValue Sel = BO->getOperand(0);
2183  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2184  SelOpNo = 1;
2185  Sel = BO->getOperand(1);
2186  }
2187 
2188  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2189  return SDValue();
2190 
2191  SDValue CT = Sel.getOperand(1);
2192  if (!isConstantOrConstantVector(CT, true) &&
2194  return SDValue();
2195 
2196  SDValue CF = Sel.getOperand(2);
2197  if (!isConstantOrConstantVector(CF, true) &&
2199  return SDValue();
2200 
2201  // Bail out if any constants are opaque because we can't constant fold those.
2202  // The exception is "and" and "or" with either 0 or -1 in which case we can
2203  // propagate non constant operands into select. I.e.:
2204  // and (select Cond, 0, -1), X --> select Cond, 0, X
2205  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2206  bool CanFoldNonConst =
2207  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2210 
2211  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2212  if (!CanFoldNonConst &&
2213  !isConstantOrConstantVector(CBO, true) &&
2215  return SDValue();
2216 
2217  // We have a select-of-constants followed by a binary operator with a
2218  // constant. Eliminate the binop by pulling the constant math into the select.
2219  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2220  SDLoc DL(Sel);
2221  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2222  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2223  if (!CanFoldNonConst && !NewCT.isUndef() &&
2224  !isConstantOrConstantVector(NewCT, true) &&
2226  return SDValue();
2227 
2228  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2229  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2230  if (!CanFoldNonConst && !NewCF.isUndef() &&
2231  !isConstantOrConstantVector(NewCF, true) &&
2233  return SDValue();
2234 
2235  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2236  SelectOp->setFlags(BO->getFlags());
2237  return SelectOp;
2238 }
2239 
2241  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2242  "Expecting add or sub");
2243 
2244  // Match a constant operand and a zext operand for the math instruction:
2245  // add Z, C
2246  // sub C, Z
2247  bool IsAdd = N->getOpcode() == ISD::ADD;
2248  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2249  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2250  auto *CN = dyn_cast<ConstantSDNode>(C);
2251  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2252  return SDValue();
2253 
2254  // Match the zext operand as a setcc of a boolean.
2255  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2256  Z.getOperand(0).getValueType() != MVT::i1)
2257  return SDValue();
2258 
2259  // Match the compare as: setcc (X & 1), 0, eq.
2260  SDValue SetCC = Z.getOperand(0);
2261  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2262  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2263  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2264  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2265  return SDValue();
2266 
2267  // We are adding/subtracting a constant and an inverted low bit. Turn that
2268  // into a subtract/add of the low bit with incremented/decremented constant:
2269  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2270  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2271  EVT VT = C.getValueType();
2272  SDLoc DL(N);
2273  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2274  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2275  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2276  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2277 }
2278 
2279 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2280 /// a shift and add with a different constant.
2282  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2283  "Expecting add or sub");
2284 
2285  // We need a constant operand for the add/sub, and the other operand is a
2286  // logical shift right: add (srl), C or sub C, (srl).
2287  bool IsAdd = N->getOpcode() == ISD::ADD;
2288  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2289  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2290  if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2291  ShiftOp.getOpcode() != ISD::SRL)
2292  return SDValue();
2293 
2294  // The shift must be of a 'not' value.
2295  SDValue Not = ShiftOp.getOperand(0);
2296  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2297  return SDValue();
2298 
2299  // The shift must be moving the sign bit to the least-significant-bit.
2300  EVT VT = ShiftOp.getValueType();
2301  SDValue ShAmt = ShiftOp.getOperand(1);
2302  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2303  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2304  return SDValue();
2305 
2306  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2307  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2308  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2309  SDLoc DL(N);
2310  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2311  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2312  if (SDValue NewC =
2313  DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2314  {ConstantOp, DAG.getConstant(1, DL, VT)}))
2315  return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2316  return SDValue();
2317 }
2318 
2319 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2320 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2321 /// are no common bits set in the operands).
2322 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2323  SDValue N0 = N->getOperand(0);
2324  SDValue N1 = N->getOperand(1);
2325  EVT VT = N0.getValueType();
2326  SDLoc DL(N);
2327 
2328  // fold (add x, undef) -> undef
2329  if (N0.isUndef())
2330  return N0;
2331  if (N1.isUndef())
2332  return N1;
2333 
2334  // fold (add c1, c2) -> c1+c2
2335  if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2336  return C;
2337 
2338  // canonicalize constant to RHS
2341  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2342 
2343  // fold vector ops
2344  if (VT.isVector()) {
2345  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2346  return FoldedVOp;
2347 
2348  // fold (add x, 0) -> x, vector edition
2350  return N0;
2351  }
2352 
2353  // fold (add x, 0) -> x
2354  if (isNullConstant(N1))
2355  return N0;
2356 
2357  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2358  // fold ((A-c1)+c2) -> (A+(c2-c1))
2359  if (N0.getOpcode() == ISD::SUB &&
2360  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2361  SDValue Sub =
2362  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2363  assert(Sub && "Constant folding failed");
2364  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2365  }
2366 
2367  // fold ((c1-A)+c2) -> (c1+c2)-A
2368  if (N0.getOpcode() == ISD::SUB &&
2369  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2370  SDValue Add =
2371  DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2372  assert(Add && "Constant folding failed");
2373  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2374  }
2375 
2376  // add (sext i1 X), 1 -> zext (not i1 X)
2377  // We don't transform this pattern:
2378  // add (zext i1 X), -1 -> sext (not i1 X)
2379  // because most (?) targets generate better code for the zext form.
2380  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2381  isOneOrOneSplat(N1)) {
2382  SDValue X = N0.getOperand(0);
2383  if ((!LegalOperations ||
2384  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2385  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2386  X.getScalarValueSizeInBits() == 1) {
2387  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2388  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2389  }
2390  }
2391 
2392  // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2393  // equivalent to (add x, c0).
2394  if (N0.getOpcode() == ISD::OR &&
2395  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2396  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2397  if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2398  {N1, N0.getOperand(1)}))
2399  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2400  }
2401  }
2402 
2403  if (SDValue NewSel = foldBinOpIntoSelect(N))
2404  return NewSel;
2405 
2406  // reassociate add
2407  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2408  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2409  return RADD;
2410 
2411  // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2412  // equivalent to (add x, c).
2413  auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2414  if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2415  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2416  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2417  return DAG.getNode(ISD::ADD, DL, VT,
2418  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2419  N0.getOperand(1));
2420  }
2421  return SDValue();
2422  };
2423  if (SDValue Add = ReassociateAddOr(N0, N1))
2424  return Add;
2425  if (SDValue Add = ReassociateAddOr(N1, N0))
2426  return Add;
2427  }
2428  // fold ((0-A) + B) -> B-A
2429  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2430  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2431 
2432  // fold (A + (0-B)) -> A-B
2433  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2434  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2435 
2436  // fold (A+(B-A)) -> B
2437  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2438  return N1.getOperand(0);
2439 
2440  // fold ((B-A)+A) -> B
2441  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2442  return N0.getOperand(0);
2443 
2444  // fold ((A-B)+(C-A)) -> (C-B)
2445  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2446  N0.getOperand(0) == N1.getOperand(1))
2447  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2448  N0.getOperand(1));
2449 
2450  // fold ((A-B)+(B-C)) -> (A-C)
2451  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2452  N0.getOperand(1) == N1.getOperand(0))
2453  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2454  N1.getOperand(1));
2455 
2456  // fold (A+(B-(A+C))) to (B-C)
2457  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2458  N0 == N1.getOperand(1).getOperand(0))
2459  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2460  N1.getOperand(1).getOperand(1));
2461 
2462  // fold (A+(B-(C+A))) to (B-C)
2463  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2464  N0 == N1.getOperand(1).getOperand(1))
2465  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2466  N1.getOperand(1).getOperand(0));
2467 
2468  // fold (A+((B-A)+or-C)) to (B+or-C)
2469  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2470  N1.getOperand(0).getOpcode() == ISD::SUB &&
2471  N0 == N1.getOperand(0).getOperand(1))
2472  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2473  N1.getOperand(1));
2474 
2475  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2476  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2477  SDValue N00 = N0.getOperand(0);
2478  SDValue N01 = N0.getOperand(1);
2479  SDValue N10 = N1.getOperand(0);
2480  SDValue N11 = N1.getOperand(1);
2481 
2483  return DAG.getNode(ISD::SUB, DL, VT,
2484  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2485  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2486  }
2487 
2488  // fold (add (umax X, C), -C) --> (usubsat X, C)
2489  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2490  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2491  return (!Max && !Op) ||
2492  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2493  };
2494  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2495  /*AllowUndefs*/ true))
2496  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2497  N0.getOperand(1));
2498  }
2499 
2500  if (SimplifyDemandedBits(SDValue(N, 0)))
2501  return SDValue(N, 0);
2502 
2503  if (isOneOrOneSplat(N1)) {
2504  // fold (add (xor a, -1), 1) -> (sub 0, a)
2505  if (isBitwiseNot(N0))
2506  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2507  N0.getOperand(0));
2508 
2509  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2510  if (N0.getOpcode() == ISD::ADD) {
2511  SDValue A, Xor;
2512 
2513  if (isBitwiseNot(N0.getOperand(0))) {
2514  A = N0.getOperand(1);
2515  Xor = N0.getOperand(0);
2516  } else if (isBitwiseNot(N0.getOperand(1))) {
2517  A = N0.getOperand(0);
2518  Xor = N0.getOperand(1);
2519  }
2520 
2521  if (Xor)
2522  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2523  }
2524 
2525  // Look for:
2526  // add (add x, y), 1
2527  // And if the target does not like this form then turn into:
2528  // sub y, (xor x, -1)
2529  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2530  N0.getOpcode() == ISD::ADD) {
2531  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2532  DAG.getAllOnesConstant(DL, VT));
2533  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2534  }
2535  }
2536 
2537  // (x - y) + -1 -> add (xor y, -1), x
2538  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2540  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2541  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2542  }
2543 
2544  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2545  return Combined;
2546 
2547  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2548  return Combined;
2549 
2550  return SDValue();
2551 }
2552 
2553 SDValue DAGCombiner::visitADD(SDNode *N) {
2554  SDValue N0 = N->getOperand(0);
2555  SDValue N1 = N->getOperand(1);
2556  EVT VT = N0.getValueType();
2557  SDLoc DL(N);
2558 
2559  if (SDValue Combined = visitADDLike(N))
2560  return Combined;
2561 
2562  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2563  return V;
2564 
2565  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2566  return V;
2567 
2568  // fold (a+b) -> (a|b) iff a and b share no bits.
2569  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2570  DAG.haveNoCommonBitsSet(N0, N1))
2571  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2572 
2573  // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2574  if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2575  const APInt &C0 = N0->getConstantOperandAPInt(0);
2576  const APInt &C1 = N1->getConstantOperandAPInt(0);
2577  return DAG.getVScale(DL, VT, C0 + C1);
2578  }
2579 
2580  // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2581  if ((N0.getOpcode() == ISD::ADD) &&
2582  (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2583  (N1.getOpcode() == ISD::VSCALE)) {
2584  const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2585  const APInt &VS1 = N1->getConstantOperandAPInt(0);
2586  SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2587  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2588  }
2589 
2590  // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2591  if (N0.getOpcode() == ISD::STEP_VECTOR &&
2592  N1.getOpcode() == ISD::STEP_VECTOR) {
2593  const APInt &C0 = N0->getConstantOperandAPInt(0);
2594  const APInt &C1 = N1->getConstantOperandAPInt(0);
2595  APInt NewStep = C0 + C1;
2596  return DAG.getStepVector(DL, VT, NewStep);
2597  }
2598 
2599  // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2600  if ((N0.getOpcode() == ISD::ADD) &&
2601  (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2602  (N1.getOpcode() == ISD::STEP_VECTOR)) {
2603  const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2604  const APInt &SV1 = N1->getConstantOperandAPInt(0);
2605  APInt NewStep = SV0 + SV1;
2606  SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2607  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2608  }
2609 
2610  return SDValue();
2611 }
2612 
2613 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2614  unsigned Opcode = N->getOpcode();
2615  SDValue N0 = N->getOperand(0);
2616  SDValue N1 = N->getOperand(1);
2617  EVT VT = N0.getValueType();
2618  SDLoc DL(N);
2619 
2620  // fold (add_sat x, undef) -> -1
2621  if (N0.isUndef() || N1.isUndef())
2622  return DAG.getAllOnesConstant(DL, VT);
2623 
2624  // fold (add_sat c1, c2) -> c3
2625  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2626  return C;
2627 
2628  // canonicalize constant to RHS
2631  return DAG.getNode(Opcode, DL, VT, N1, N0);
2632 
2633  // fold vector ops
2634  if (VT.isVector()) {
2635  // TODO SimplifyVBinOp
2636 
2637  // fold (add_sat x, 0) -> x, vector edition
2639  return N0;
2640  }
2641 
2642  // fold (add_sat x, 0) -> x
2643  if (isNullConstant(N1))
2644  return N0;
2645 
2646  // If it cannot overflow, transform into an add.
2647  if (Opcode == ISD::UADDSAT)
2648  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2649  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2650 
2651  return SDValue();
2652 }
2653 
2654 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2655  bool Masked = false;
2656 
2657  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2658  while (true) {
2659  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2660  V = V.getOperand(0);
2661  continue;
2662  }
2663 
2664  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2665  Masked = true;
2666  V = V.getOperand(0);
2667  continue;
2668  }
2669 
2670  break;
2671  }
2672 
2673  // If this is not a carry, return.
2674  if (V.getResNo() != 1)
2675  return SDValue();
2676 
2677  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2678  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2679  return SDValue();
2680 
2681  EVT VT = V.getNode()->getValueType(0);
2682  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2683  return SDValue();
2684 
2685  // If the result is masked, then no matter what kind of bool it is we can
2686  // return. If it isn't, then we need to make sure the bool type is either 0 or
2687  // 1 and not other values.
2688  if (Masked ||
2689  TLI.getBooleanContents(V.getValueType()) ==
2691  return V;
2692 
2693  return SDValue();
2694 }
2695 
2696 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2697 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2698 /// the opcode and bypass the mask operation.
2699 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2700  SelectionDAG &DAG, const SDLoc &DL) {
2701  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2702  return SDValue();
2703 
2704  EVT VT = N0.getValueType();
2705  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2706  return SDValue();
2707 
2708  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2709  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2710  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2711 }
2712 
2713 /// Helper for doing combines based on N0 and N1 being added to each other.
2714 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2715  SDNode *LocReference) {
2716  EVT VT = N0.getValueType();
2717  SDLoc DL(LocReference);
2718 
2719  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2720  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2722  return DAG.getNode(ISD::SUB, DL, VT, N0,
2723  DAG.getNode(ISD::SHL, DL, VT,
2724  N1.getOperand(0).getOperand(1),
2725  N1.getOperand(1)));
2726 
2727  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2728  return V;
2729 
2730  // Look for:
2731  // add (add x, 1), y
2732  // And if the target does not like this form then turn into:
2733  // sub y, (xor x, -1)
2734  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2735  N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2736  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2737  DAG.getAllOnesConstant(DL, VT));
2738  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2739  }
2740 
2741  // Hoist one-use subtraction by non-opaque constant:
2742  // (x - C) + y -> (x + y) - C
2743  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2744  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2745  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2746  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2747  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2748  }
2749  // Hoist one-use subtraction from non-opaque constant:
2750  // (C - x) + y -> (y - x) + C
2751  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2752  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2753  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2754  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2755  }
2756 
2757  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2758  // rather than 'add 0/-1' (the zext should get folded).
2759  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2760  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2761  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2763  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2764  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2765  }
2766 
2767  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2768  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2769  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2770  if (TN->getVT() == MVT::i1) {
2771  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2772  DAG.getConstant(1, DL, VT));
2773  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2774  }
2775  }
2776 
2777  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2778  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2779  N1.getResNo() == 0)
2780  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2781  N0, N1.getOperand(0), N1.getOperand(2));
2782 
2783  // (add X, Carry) -> (addcarry X, 0, Carry)
2785  if (SDValue Carry = getAsCarry(TLI, N1))
2786  return DAG.getNode(ISD::ADDCARRY, DL,
2787  DAG.getVTList(VT, Carry.getValueType()), N0,
2788  DAG.getConstant(0, DL, VT), Carry);
2789 
2790  return SDValue();
2791 }
2792 
2793 SDValue DAGCombiner::visitADDC(SDNode *N) {
2794  SDValue N0 = N->getOperand(0);
2795  SDValue N1 = N->getOperand(1);
2796  EVT VT = N0.getValueType();
2797  SDLoc DL(N);
2798 
2799  // If the flag result is dead, turn this into an ADD.
2800  if (!N->hasAnyUseOfValue(1))
2801  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2803 
2804  // canonicalize constant to RHS.
2805  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2806  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2807  if (N0C && !N1C)
2808  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2809 
2810  // fold (addc x, 0) -> x + no carry out
2811  if (isNullConstant(N1))
2812  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2813  DL, MVT::Glue));
2814 
2815  // If it cannot overflow, transform into an add.
2816  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2817  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2819 
2820  return SDValue();
2821 }
2822 
2823 /**
2824  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2825  * then the flip also occurs if computing the inverse is the same cost.
2826  * This function returns an empty SDValue in case it cannot flip the boolean
2827  * without increasing the cost of the computation. If you want to flip a boolean
2828  * no matter what, use DAG.getLogicalNOT.
2829  */
2831  const TargetLowering &TLI,
2832  bool Force) {
2833  if (Force && isa<ConstantSDNode>(V))
2834  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2835 
2836  if (V.getOpcode() != ISD::XOR)
2837  return SDValue();
2838 
2839  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2840  if (!Const)
2841  return SDValue();
2842 
2843  EVT VT = V.getValueType();
2844 
2845  bool IsFlip = false;
2846  switch(TLI.getBooleanContents(VT)) {
2848  IsFlip = Const->isOne();
2849  break;
2851  IsFlip = Const->isAllOnes();
2852  break;
2854  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2855  break;
2856  }
2857 
2858  if (IsFlip)
2859  return V.getOperand(0);
2860  if (Force)
2861  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2862  return SDValue();
2863 }
2864 
2865 SDValue DAGCombiner::visitADDO(SDNode *N) {
2866  SDValue N0 = N->getOperand(0);
2867  SDValue N1 = N->getOperand(1);
2868  EVT VT = N0.getValueType();
2869  bool IsSigned = (ISD::SADDO == N->getOpcode());
2870 
2871  EVT CarryVT = N->getValueType(1);
2872  SDLoc DL(N);
2873 
2874  // If the flag result is dead, turn this into an ADD.
2875  if (!N->hasAnyUseOfValue(1))
2876  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2877  DAG.getUNDEF(CarryVT));
2878 
2879  // canonicalize constant to RHS.
2882  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2883 
2884  // fold (addo x, 0) -> x + no carry out
2885  if (isNullOrNullSplat(N1))
2886  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2887 
2888  if (!IsSigned) {
2889  // If it cannot overflow, transform into an add.
2890  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2891  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2892  DAG.getConstant(0, DL, CarryVT));
2893 
2894  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2895  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2896  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2897  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2898  return CombineTo(
2899  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2900  }
2901 
2902  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2903  return Combined;
2904 
2905  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2906  return Combined;
2907  }
2908 
2909  return SDValue();
2910 }
2911 
2912 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2913  EVT VT = N0.getValueType();
2914  if (VT.isVector())
2915  return SDValue();
2916 
2917  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2918  // If Y + 1 cannot overflow.
2919  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2920  SDValue Y = N1.getOperand(0);
2921  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2923  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2924  N1.getOperand(2));
2925  }
2926 
2927  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2929  if (SDValue Carry = getAsCarry(TLI, N1))
2930  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2931  DAG.getConstant(0, SDLoc(N), VT), Carry);
2932 
2933  return SDValue();
2934 }
2935 
2936 SDValue DAGCombiner::visitADDE(SDNode *N) {
2937  SDValue N0 = N->getOperand(0);
2938  SDValue N1 = N->getOperand(1);
2939  SDValue CarryIn = N->getOperand(2);
2940 
2941  // canonicalize constant to RHS
2942  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2943  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2944  if (N0C && !N1C)
2945  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2946  N1, N0, CarryIn);
2947 
2948  // fold (adde x, y, false) -> (addc x, y)
2949  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2950  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2951 
2952  return SDValue();
2953 }
2954 
2955 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2956  SDValue N0 = N->getOperand(0);
2957  SDValue N1 = N->getOperand(1);
2958  SDValue CarryIn = N->getOperand(2);
2959  SDLoc DL(N);
2960 
2961  // canonicalize constant to RHS
2962  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2963  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2964  if (N0C && !N1C)
2965  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2966 
2967  // fold (addcarry x, y, false) -> (uaddo x, y)
2968  if (isNullConstant(CarryIn)) {
2969  if (!LegalOperations ||
2970  TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2971  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2972  }
2973 
2974  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2975  if (isNullConstant(N0) && isNullConstant(N1)) {
2976  EVT VT = N0.getValueType();
2977  EVT CarryVT = CarryIn.getValueType();
2978  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2979  AddToWorklist(CarryExt.getNode());
2980  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2981  DAG.getConstant(1, DL, VT)),
2982  DAG.getConstant(0, DL, CarryVT));
2983  }
2984 
2985  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2986  return Combined;
2987 
2988  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2989  return Combined;
2990 
2991  return SDValue();
2992 }
2993 
2994 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2995  SDValue N0 = N->getOperand(0);
2996  SDValue N1 = N->getOperand(1);
2997  SDValue CarryIn = N->getOperand(2);
2998  SDLoc DL(N);
2999 
3000  // canonicalize constant to RHS
3001  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3002  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3003  if (N0C && !N1C)
3004  return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3005 
3006  // fold (saddo_carry x, y, false) -> (saddo x, y)
3007  if (isNullConstant(CarryIn)) {
3008  if (!LegalOperations ||
3009  TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3010  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3011  }
3012 
3013  return SDValue();
3014 }
3015 
3016 /**
3017  * If we are facing some sort of diamond carry propapagtion pattern try to
3018  * break it up to generate something like:
3019  * (addcarry X, 0, (addcarry A, B, Z):Carry)
3020  *
3021  * The end result is usually an increase in operation required, but because the
3022  * carry is now linearized, other tranforms can kick in and optimize the DAG.
3023  *
3024  * Patterns typically look something like
3025  * (uaddo A, B)
3026  * / \
3027  * Carry Sum
3028  * | \
3029  * | (addcarry *, 0, Z)
3030  * | /
3031  * \ Carry
3032  * | /
3033  * (addcarry X, *, *)
3034  *
3035  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3036  * produce a combine with a single path for carry propagation.
3037  */
3039  SDValue X, SDValue Carry0, SDValue Carry1,
3040  SDNode *N) {
3041  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3042  return SDValue();
3043  if (Carry1.getOpcode() != ISD::UADDO)
3044  return SDValue();
3045 
3046  SDValue Z;
3047 
3048  /**
3049  * First look for a suitable Z. It will present itself in the form of
3050  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3051  */
3052  if (Carry0.getOpcode() == ISD::ADDCARRY &&
3053  isNullConstant(Carry0.getOperand(1))) {
3054  Z = Carry0.getOperand(2);
3055  } else if (Carry0.getOpcode() == ISD::UADDO &&
3056  isOneConstant(Carry0.getOperand(1))) {
3057  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3058  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3059  } else {
3060  // We couldn't find a suitable Z.
3061  return SDValue();
3062  }
3063 
3064 
3065  auto cancelDiamond = [&](SDValue A,SDValue B) {
3066  SDLoc DL(N);
3067  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3068  Combiner.AddToWorklist(NewY.getNode());
3069  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3070  DAG.getConstant(0, DL, X.getValueType()),
3071  NewY.getValue(1));
3072  };
3073 
3074  /**
3075  * (uaddo A, B)
3076  * |
3077  * Sum
3078  * |
3079  * (addcarry *, 0, Z)
3080  */
3081  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3082  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3083  }
3084 
3085  /**
3086  * (addcarry A, 0, Z)
3087  * |
3088  * Sum
3089  * |
3090  * (uaddo *, B)
3091  */
3092  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3093  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3094  }
3095 
3096  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3097  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3098  }
3099 
3100  return SDValue();
3101 }
3102 
3103 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3104 // match patterns like:
3105 //
3106 // (uaddo A, B) CarryIn
3107 // | \ |
3108 // | \ |
3109 // PartialSum PartialCarryOutX /
3110 // | | /
3111 // | ____|____________/
3112 // | / |
3113 // (uaddo *, *) \________
3114 // | \ \
3115 // | \ |
3116 // | PartialCarryOutY |
3117 // | \ |
3118 // | \ /
3119 // AddCarrySum | ______/
3120 // | /
3121 // CarryOut = (or *, *)
3122 //
3123 // And generate ADDCARRY (or SUBCARRY) with two result values:
3124 //
3125 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3126 //
3127 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3128 // a single path for carry/borrow out propagation:
3130  SDValue Carry0, SDValue Carry1, SDNode *N) {
3131  if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3132  return SDValue();
3133  unsigned Opcode = Carry0.getOpcode();
3134  if (Opcode != Carry1.getOpcode())
3135  return SDValue();
3136  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3137  return SDValue();
3138 
3139  // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3140  // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3141  // the above ASCII art.)
3142  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3143  Carry1.getOperand(1) != Carry0.getValue(0))
3144  std::swap(Carry0, Carry1);
3145  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3146  Carry1.getOperand(1) != Carry0.getValue(0))
3147  return SDValue();
3148 
3149  // The carry in value must be on the righthand side for subtraction.
3150  unsigned CarryInOperandNum =
3151  Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3152  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3153  return SDValue();
3154  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3155 
3156  unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3157  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3158  return SDValue();
3159 
3160  // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3161  // TODO: make getAsCarry() aware of how partial carries are merged.
3162  if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3163  return SDValue();
3164  CarryIn = CarryIn.getOperand(0);
3165  if (CarryIn.getValueType() != MVT::i1)
3166  return SDValue();
3167 
3168  SDLoc DL(N);
3169  SDValue Merged =
3170  DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3171  Carry0.getOperand(1), CarryIn);
3172 
3173  // Please note that because we have proven that the result of the UADDO/USUBO
3174  // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3175  // therefore prove that if the first UADDO/USUBO overflows, the second
3176  // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3177  // maximum value.
3178  //
3179  // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3180  // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3181  //
3182  // This is important because it means that OR and XOR can be used to merge
3183  // carry flags; and that AND can return a constant zero.
3184  //
3185  // TODO: match other operations that can merge flags (ADD, etc)
3186  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3187  if (N->getOpcode() == ISD::AND)
3188  return DAG.getConstant(0, DL, MVT::i1);
3189  return Merged.getValue(1);
3190 }
3191 
3192 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3193  SDNode *N) {
3194  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3195  if (isBitwiseNot(N0))
3196  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3197  SDLoc DL(N);
3198  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3199  N0.getOperand(0), NotC);
3200  return CombineTo(
3201  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3202  }
3203 
3204  // Iff the flag result is dead:
3205  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3206  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3207  // or the dependency between the instructions.
3208  if ((N0.getOpcode() == ISD::ADD ||
3209  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3210  N0.getValue(1) != CarryIn)) &&
3211  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3212  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3213  N0.getOperand(0), N0.getOperand(1), CarryIn);
3214 
3215  /**
3216  * When one of the addcarry argument is itself a carry, we may be facing
3217  * a diamond carry propagation. In which case we try to transform the DAG
3218  * to ensure linear carry propagation if that is possible.
3219  */
3220  if (auto Y = getAsCarry(TLI, N1)) {
3221  // Because both are carries, Y and Z can be swapped.
3222  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3223  return R;
3224  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3225  return R;
3226  }
3227 
3228  return SDValue();
3229 }
3230 
3231 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3232 // clamp/truncation if necessary.
3234  SDValue RHS, SelectionDAG &DAG,
3235  const SDLoc &DL) {
3236  assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3237  "Illegal truncation");
3238 
3239  if (DstVT == SrcVT)
3240  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3241 
3242  // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3243  // clamping RHS.
3244  APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3245  DstVT.getScalarSizeInBits());
3246  if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3247  return SDValue();
3248 
3249  SDValue SatLimit =
3251  DstVT.getScalarSizeInBits()),
3252  DL, SrcVT);
3253  RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3254  RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3255  LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3256  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3257 }
3258 
3259 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3260 // usubsat(a,b), optionally as a truncated type.
3261 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3262  if (N->getOpcode() != ISD::SUB ||
3263  !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3264  return SDValue();
3265 
3266  EVT SubVT = N->getValueType(0);
3267  SDValue Op0 = N->getOperand(0);
3268  SDValue Op1 = N->getOperand(1);
3269 
3270  // Try to find umax(a,b) - b or a - umin(a,b) patterns
3271  // they may be converted to usubsat(a,b).
3272  if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3273  SDValue MaxLHS = Op0.getOperand(0);
3274  SDValue MaxRHS = Op0.getOperand(1);
3275  if (MaxLHS == Op1)
3276  return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3277  if (MaxRHS == Op1)
3278  return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3279  }
3280 
3281  if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3282  SDValue MinLHS = Op1.getOperand(0);
3283  SDValue MinRHS = Op1.getOperand(1);
3284  if (MinLHS == Op0)
3285  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3286  if (MinRHS == Op0)
3287  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3288  }
3289 
3290  // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3291  if (Op1.getOpcode() == ISD::TRUNCATE &&
3292  Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3293  Op1.getOperand(0).hasOneUse()) {
3294  SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3295  SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3296  if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3297  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3298  DAG, SDLoc(N));
3299  if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3300  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3301  DAG, SDLoc(N));
3302  }
3303 
3304  return SDValue();
3305 }
3306 
3307 // Since it may not be valid to emit a fold to zero for vector initializers
3308 // check if we can before folding.
3309 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3310  SelectionDAG &DAG, bool LegalOperations) {
3311  if (!VT.isVector())
3312  return DAG.getConstant(0, DL, VT);
3313  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3314  return DAG.getConstant(0, DL, VT);
3315  return SDValue();
3316 }
3317 
3318 SDValue DAGCombiner::visitSUB(SDNode *N) {
3319  SDValue N0 = N->getOperand(0);
3320  SDValue N1 = N->getOperand(1);
3321  EVT VT = N0.getValueType();
3322  SDLoc DL(N);
3323 
3324  // fold (sub x, x) -> 0
3325  // FIXME: Refactor this and xor and other similar operations together.
3326  if (N0 == N1)
3327  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3328 
3329  // fold (sub c1, c2) -> c3
3330  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3331  return C;
3332 
3333  // fold vector ops
3334  if (VT.isVector()) {
3335  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3336  return FoldedVOp;
3337 
3338  // fold (sub x, 0) -> x, vector edition
3340  return N0;
3341  }
3342 
3343  if (SDValue NewSel = foldBinOpIntoSelect(N))
3344  return NewSel;
3345 
3347 
3348  // fold (sub x, c) -> (add x, -c)
3349  if (N1C) {
3350  return DAG.getNode(ISD::ADD, DL, VT, N0,
3351  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3352  }
3353 
3354  if (isNullOrNullSplat(N0)) {
3355  unsigned BitWidth = VT.getScalarSizeInBits();
3356  // Right-shifting everything out but the sign bit followed by negation is
3357  // the same as flipping arithmetic/logical shift type without the negation:
3358  // -(X >>u 31) -> (X >>s 31)
3359  // -(X >>s 31) -> (X >>u 31)
3360  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3361  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3362  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3363  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3364  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3365  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3366  }
3367  }
3368 
3369  // 0 - X --> 0 if the sub is NUW.
3370  if (N->getFlags().hasNoUnsignedWrap())
3371  return N0;
3372 
3374  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3375  // N1 must be 0 because negating the minimum signed value is undefined.
3376  if (N->getFlags().hasNoSignedWrap())
3377  return N0;
3378 
3379  // 0 - X --> X if X is 0 or the minimum signed value.
3380  return N1;
3381  }
3382 
3383  // Convert 0 - abs(x).
3384  if (N1->getOpcode() == ISD::ABS &&
3386  if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3387  return Result;
3388 
3389  // Fold neg(splat(neg(x)) -> splat(x)
3390  if (VT.isVector()) {
3391  SDValue N1S = DAG.getSplatValue(N1, true);
3392  if (N1S && N1S.getOpcode() == ISD::SUB &&
3393  isNullConstant(N1S.getOperand(0))) {
3394  if (VT.isScalableVector())
3395  return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3396  return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3397  }
3398  }
3399  }
3400 
3401  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3402  if (isAllOnesOrAllOnesSplat(N0))
3403  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3404 
3405  // fold (A - (0-B)) -> A+B
3406  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3407  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3408 
3409  // fold A-(A-B) -> B
3410  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3411  return N1.getOperand(1);
3412 
3413  // fold (A+B)-A -> B
3414  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3415  return N0.getOperand(1);
3416 
3417  // fold (A+B)-B -> A
3418  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3419  return N0.getOperand(0);
3420 
3421  // fold (A+C1)-C2 -> A+(C1-C2)
3422  if (N0.getOpcode() == ISD::ADD &&
3423  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3424  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3425  SDValue NewC =
3426  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3427  assert(NewC && "Constant folding failed");
3428  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3429  }
3430 
3431  // fold C2-(A+C1) -> (C2-C1)-A
3432  if (N1.getOpcode() == ISD::ADD) {
3433  SDValue N11 = N1.getOperand(1);
3434  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3435  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3436  SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3437  assert(NewC && "Constant folding failed");
3438  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3439  }
3440  }
3441 
3442  // fold (A-C1)-C2 -> A-(C1+C2)
3443  if (N0.getOpcode() == ISD::SUB &&
3444  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3445  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3446  SDValue NewC =
3447  DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3448  assert(NewC && "Constant folding failed");
3449  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3450  }
3451 
3452  // fold (c1-A)-c2 -> (c1-c2)-A
3453  if (N0.getOpcode() == ISD::SUB &&
3454  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3455  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3456  SDValue NewC =
3457  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3458  assert(NewC && "Constant folding failed");
3459  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3460  }
3461 
3462  // fold ((A+(B+or-C))-B) -> A+or-C
3463  if (N0.getOpcode() == ISD::ADD &&
3464  (N0.getOperand(1).getOpcode() == ISD::SUB ||
3465  N0.getOperand(1).getOpcode() == ISD::ADD) &&
3466  N0.getOperand(1).getOperand(0) == N1)
3467  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3468  N0.getOperand(1).getOperand(1));
3469 
3470  // fold ((A+(C+B))-B) -> A+C
3471  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3472  N0.getOperand(1).getOperand(1) == N1)
3473  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3474  N0.getOperand(1).getOperand(0));
3475 
3476  // fold ((A-(B-C))-C) -> A-B
3477  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3478  N0.getOperand(1).getOperand(1) == N1)
3479  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3480  N0.getOperand(1).getOperand(0));
3481 
3482  // fold (A-(B-C)) -> A+(C-B)
3483  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3484  return DAG.getNode(ISD::ADD, DL, VT, N0,
3485  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3486  N1.getOperand(0)));
3487 
3488  // A - (A & B) -> A & (~B)
3489  if (N1.getOpcode() == ISD::AND) {
3490  SDValue A = N1.getOperand(0);
3491  SDValue B = N1.getOperand(1);
3492  if (A != N0)
3493  std::swap(A, B);
3494  if (A == N0 &&
3495  (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3496  SDValue InvB =
3497  DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3498  return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3499  }
3500  }
3501 
3502  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3503  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3504  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3506  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3507  N1.getOperand(0).getOperand(1),
3508  N1.getOperand(1));
3509  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3510  }
3511  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3513  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3514  N1.getOperand(0),
3515  N1.getOperand(1).getOperand(1));
3516  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3517  }
3518  }
3519 
3520  // If either operand of a sub is undef, the result is undef
3521  if (N0.isUndef())
3522  return N0;
3523  if (N1.isUndef())
3524  return N1;
3525 
3526  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3527  return V;
3528 
3529  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3530  return V;
3531 
3532  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3533  return V;
3534 
3535  if (SDValue V = foldSubToUSubSat(VT, N))
3536  return V;
3537 
3538  // (x - y) - 1 -> add (xor y, -1), x
3539  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3540  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3541  DAG.getAllOnesConstant(DL, VT));
3542  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3543  }
3544 
3545  // Look for:
3546  // sub y, (xor x, -1)
3547  // And if the target does not like this form then turn into:
3548  // add (add x, y), 1
3549  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3550  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3551  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3552  }
3553 
3554  // Hoist one-use addition by non-opaque constant:
3555  // (x + C) - y -> (x - y) + C
3556  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3557  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3558  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3559  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3560  }
3561  // y - (x + C) -> (y - x) - C
3562  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3563  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3564  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3565  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3566  }
3567  // (x - C) - y -> (x - y) - C
3568  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3569  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3570  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3571  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3572  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3573  }
3574  // (C - x) - y -> C - (x + y)
3575  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3576  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3577  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3578  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3579  }
3580 
3581  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3582  // rather than 'sub 0/1' (the sext should get folded).
3583  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3584  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3585  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3586  TLI.getBooleanContents(VT) ==
3588  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3589  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3590  }
3591 
3592  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3593  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3594  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3595  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3596  SDValue S0 = N1.getOperand(0);
3597  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3599  if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3600  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3601  }
3602  }
3603 
3604  // If the relocation model supports it, consider symbol offsets.
3605  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3606  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3607  // fold (sub Sym, c) -> Sym-c
3608  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3609  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3610  GA->getOffset() -
3611  (uint64_t)N1C->getSExtValue());
3612  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3613  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3614  if (GA->getGlobal() == GB->getGlobal())
3615  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3616  DL, VT);
3617  }
3618 
3619  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3620  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3621  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3622  if (TN->getVT() == MVT::i1) {
3623  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3624  DAG.getConstant(1, DL, VT));
3625  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3626  }
3627  }
3628 
3629  // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3630  if (N1.getOpcode() == ISD::VSCALE) {
3631  const APInt &IntVal = N1.getConstantOperandAPInt(0);
3632  return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3633  }
3634 
3635  // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3636  if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3637  APInt NewStep = -N1.getConstantOperandAPInt(0);
3638  return DAG.getNode(ISD::ADD, DL, VT, N0,
3639  DAG.getStepVector(DL, VT, NewStep));
3640  }
3641 
3642  // Prefer an add for more folding potential and possibly better codegen:
3643  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3644  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3645  SDValue ShAmt = N1.getOperand(1);
3646  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3647  if (ShAmtC &&
3648  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3649  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3650  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3651  }
3652  }
3653 
3654  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3655  // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3656  if (SDValue Carry = getAsCarry(TLI, N0)) {
3657  SDValue X = N1;
3658  SDValue Zero = DAG.getConstant(0, DL, VT);
3659  SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3660  return DAG.getNode(ISD::ADDCARRY, DL,
3661  DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3662  Carry);
3663  }
3664  }
3665 
3666  return SDValue();
3667 }
3668 
3669 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3670  SDValue N0 = N->getOperand(0);
3671  SDValue N1 = N->getOperand(1);
3672  EVT VT = N0.getValueType();
3673  SDLoc DL(N);
3674 
3675  // fold (sub_sat x, undef) -> 0
3676  if (N0.isUndef() || N1.isUndef())
3677  return DAG.getConstant(0, DL, VT);
3678 
3679  // fold (sub_sat x, x) -> 0
3680  if (N0 == N1)
3681  return DAG.getConstant(0, DL, VT);
3682 
3683  // fold (sub_sat c1, c2) -> c3
3684  if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3685  return C;
3686 
3687  // fold vector ops
3688  if (VT.isVector()) {
3689  // TODO SimplifyVBinOp
3690 
3691  // fold (sub_sat x, 0) -> x, vector edition
3693  return N0;
3694  }
3695 
3696  // fold (sub_sat x, 0) -> x
3697  if (isNullConstant(N1))
3698  return N0;
3699 
3700  return SDValue();
3701 }
3702 
3703 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3704  SDValue N0 = N->getOperand(0);
3705  SDValue N1 = N->getOperand(1);
3706  EVT VT = N0.getValueType();
3707  SDLoc DL(N);
3708 
3709  // If the flag result is dead, turn this into an SUB.
3710  if (!N->hasAnyUseOfValue(1))
3711  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3713 
3714  // fold (subc x, x) -> 0 + no borrow
3715  if (N0 == N1)
3716  return CombineTo(N, DAG.getConstant(0, DL, VT),
3718 
3719  // fold (subc x, 0) -> x + no borrow
3720  if (isNullConstant(N1))
3721  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3722 
3723  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3724  if (isAllOnesConstant(N0))
3725  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3727 
3728  return SDValue();
3729 }
3730 
3731 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3732  SDValue N0 = N->getOperand(0);
3733  SDValue N1 = N->getOperand(1);
3734  EVT VT = N0.getValueType();
3735  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3736 
3737  EVT CarryVT = N->getValueType(1);
3738  SDLoc DL(N);
3739 
3740  // If the flag result is dead, turn this into an SUB.
3741  if (!N->hasAnyUseOfValue(1))
3742  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3743  DAG.getUNDEF(CarryVT));
3744 
3745  // fold (subo x, x) -> 0 + no borrow
3746  if (N0 == N1)
3747  return CombineTo(N, DAG.getConstant(0, DL, VT),
3748  DAG.getConstant(0, DL, CarryVT));
3749 
3751 
3752  // fold (subox, c) -> (addo x, -c)
3753  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3754  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3755  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3756  }
3757 
3758  // fold (subo x, 0) -> x + no borrow
3759  if (isNullOrNullSplat(N1))
3760  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3761 
3762  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3763  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3764  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3765  DAG.getConstant(0, DL, CarryVT));
3766 
3767  return SDValue();
3768 }
3769 
3770 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3771  SDValue N0 = N->getOperand(0);
3772  SDValue N1 = N->getOperand(1);
3773  SDValue CarryIn = N->getOperand(2);
3774 
3775  // fold (sube x, y, false) -> (subc x, y)
3776  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3777  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3778 
3779  return SDValue();
3780 }
3781 
3782 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3783  SDValue N0 = N->getOperand(0);
3784  SDValue N1 = N->getOperand(1);
3785  SDValue CarryIn = N->getOperand(2);
3786 
3787  // fold (subcarry x, y, false) -> (usubo x, y)
3788  if (isNullConstant(CarryIn)) {
3789  if (!LegalOperations ||
3790  TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3791  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3792  }
3793 
3794  return SDValue();
3795 }
3796 
3797 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3798  SDValue N0 = N->getOperand(0);
3799  SDValue N1 = N->getOperand(1);
3800  SDValue CarryIn = N->getOperand(2);
3801 
3802  // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3803  if (isNullConstant(CarryIn)) {
3804  if (!LegalOperations ||
3805  TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3806  return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3807  }
3808 
3809  return SDValue();
3810 }
3811 
3812 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3813 // UMULFIXSAT here.
3814 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3815  SDValue N0 = N->getOperand(0);
3816  SDValue N1 = N->getOperand(1);
3817  SDValue Scale = N->getOperand(2);
3818  EVT VT = N0.getValueType();
3819 
3820  // fold (mulfix x, undef, scale) -> 0
3821  if (N0.isUndef() || N1.isUndef())
3822  return DAG.getConstant(0, SDLoc(N), VT);
3823 
3824  // Canonicalize constant to RHS (vector doesn't have to splat)
3827  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3828 
3829  // fold (mulfix x, 0, scale) -> 0
3830  if (isNullConstant(N1))
3831  return DAG.getConstant(0, SDLoc(N), VT);
3832 
3833  return SDValue();
3834 }
3835 
3836 SDValue DAGCombiner::visitMUL(SDNode *N) {
3837  SDValue N0 = N->getOperand(0);
3838  SDValue N1 = N->getOperand(1);
3839  EVT VT = N0.getValueType();
3840 
3841  // fold (mul x, undef) -> 0
3842  if (N0.isUndef() || N1.isUndef())
3843  return DAG.getConstant(0, SDLoc(N), VT);
3844 
3845  // fold (mul c1, c2) -> c1*c2
3846  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3847  return C;
3848 
3849  // canonicalize constant to RHS (vector doesn't have to splat)
3852  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3853 
3854  bool N1IsConst = false;
3855  bool N1IsOpaqueConst = false;
3856  APInt ConstValue1;
3857 
3858  // fold vector ops
3859  if (VT.isVector()) {
3860  if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
3861  return FoldedVOp;
3862 
3863  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3864  assert((!N1IsConst ||
3865  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3866  "Splat APInt should be element width");
3867  } else {
3868  N1IsConst = isa<ConstantSDNode>(N1);
3869  if (N1IsConst) {
3870  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3871  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3872  }
3873  }
3874 
3875  // fold (mul x, 0) -> 0
3876  if (N1IsConst && ConstValue1.isZero())
3877  return N1;
3878 
3879  // fold (mul x, 1) -> x
3880  if (N1IsConst && ConstValue1.isOne())
3881  return N0;
3882 
3883  if (SDValue NewSel = foldBinOpIntoSelect(N))
3884  return NewSel;
3885 
3886  // fold (mul x, -1) -> 0-x
3887  if (N1IsConst && ConstValue1.isAllOnes()) {
3888  SDLoc DL(N);
3889  return DAG.getNode(ISD::SUB, DL, VT,
3890  DAG.getConstant(0, DL, VT), N0);
3891  }
3892 
3893  // fold (mul x, (1 << c)) -> x << c
3894  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3895  DAG.isKnownToBeAPowerOfTwo(N1) &&
3896  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3897  SDLoc DL(N);
3898  SDValue LogBase2 = BuildLogBase2(N1, DL);
3899  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3900  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3901  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3902  }
3903 
3904  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3905  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
3906  unsigned Log2Val = (-ConstValue1).logBase2();
3907  SDLoc DL(N);
3908  // FIXME: If the input is something that is easily negated (e.g. a
3909  // single-use add), we should put the negate there.
3910  return DAG.getNode(ISD::SUB, DL, VT,
3911  DAG.getConstant(0, DL, VT),
3912  DAG.getNode(ISD::SHL, DL, VT, N0,
3913  DAG.getConstant(Log2Val, DL,
3914  getShiftAmountTy(N0.getValueType()))));
3915  }
3916 
3917  // Try to transform:
3918  // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3919  // mul x, (2^N + 1) --> add (shl x, N), x
3920  // mul x, (2^N - 1) --> sub (shl x, N), x
3921  // Examples: x * 33 --> (x << 5) + x
3922  // x * 15 --> (x << 4) - x
3923  // x * -33 --> -((x << 5) + x)
3924  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3925  // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3926  // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3927  // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3928  // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3929  // x * 0xf800 --> (x << 16) - (x << 11)
3930  // x * -0x8800 --> -((x << 15) + (x << 11))
3931  // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3932  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3933  // TODO: We could handle more general decomposition of any constant by
3934  // having the target set a limit on number of ops and making a
3935  // callback to determine that sequence (similar to sqrt expansion).
3936  unsigned MathOp = ISD::DELETED_NODE;
3937  APInt MulC = ConstValue1.abs();
3938  // The constant `2` should be treated as (2^0 + 1).
3939  unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3940  MulC.lshrInPlace(TZeros);
3941  if ((MulC - 1).isPowerOf2())
3942  MathOp = ISD::ADD;
3943  else if ((MulC + 1).isPowerOf2())
3944  MathOp = ISD::SUB;
3945 
3946  if (MathOp != ISD::DELETED_NODE) {
3947  unsigned ShAmt =
3948  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3949  ShAmt += TZeros;
3950  assert(ShAmt < VT.getScalarSizeInBits() &&
3951  "multiply-by-constant generated out of bounds shift");
3952  SDLoc DL(N);
3953  SDValue Shl =
3954  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3955  SDValue R =
3956  TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3957  DAG.getNode(ISD::SHL, DL, VT, N0,
3958  DAG.getConstant(TZeros, DL, VT)))
3959  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3960  if (ConstValue1.isNegative())
3961  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3962  return R;
3963  }
3964  }
3965 
3966  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3967  if (N0.getOpcode() == ISD::SHL &&
3968  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3969  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3970  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3972  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3973  }
3974 
3975  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3976  // use.
3977  {
3978  SDValue Sh, Y;
3979 
3980  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3981  if (N0.getOpcode() == ISD::SHL &&
3983  N0.getNode()->hasOneUse()) {
3984  Sh = N0; Y = N1;
3985  } else if (N1.getOpcode() == ISD::SHL &&
3987  N1.getNode()->hasOneUse()) {
3988  Sh = N1; Y = N0;
3989  }
3990 
3991  if (Sh.getNode()) {
3992  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3993  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3994  }
3995  }
3996 
3997  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3999  N0.getOpcode() == ISD::ADD &&
4001  isMulAddWithConstProfitable(N, N0, N1))
4002  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
4003  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
4004  N0.getOperand(0), N1),
4005  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
4006  N0.getOperand(1), N1));
4007 
4008  // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4009  if (N0.getOpcode() == ISD::VSCALE)
4010  if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
4011  const APInt &C0 = N0.getConstantOperandAPInt(0);
4012  const APInt &C1 = NC1->getAPIntValue();
4013  return DAG.getVScale(SDLoc(N), VT, C0 * C1);
4014  }
4015 
4016  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4017  APInt MulVal;
4018  if (N0.getOpcode() == ISD::STEP_VECTOR)
4019  if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4020  const APInt &C0 = N0.getConstantOperandAPInt(0);
4021  APInt NewStep = C0 * MulVal;
4022  return DAG.getStepVector(SDLoc(N), VT, NewStep);
4023  }
4024 
4025  // Fold ((mul x, 0/undef) -> 0,
4026  // (mul x, 1) -> x) -> x)
4027  // -> and(x, mask)
4028  // We can replace vectors with '0' and '1' factors with a clearing mask.
4029  if (VT.isFixedLengthVector()) {
4030  unsigned NumElts = VT.getVectorNumElements();
4031  SmallBitVector ClearMask;
4032  ClearMask.reserve(NumElts);
4033  auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4034  if (!V || V->isZero()) {
4035  ClearMask.push_back(true);
4036  return true;
4037  }
4038  ClearMask.push_back(false);
4039  return V->isOne();
4040  };
4041  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4042  ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4043  assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4044  SDLoc DL(N);
4045  EVT LegalSVT = N1.getOperand(0).getValueType();
4046  SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4047  SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4048  SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4049  for (unsigned I = 0; I != NumElts; ++I)
4050  if (ClearMask[I])
4051  Mask[I] = Zero;
4052  return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4053  }
4054  }
4055 
4056  // reassociate mul
4057  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
4058  return RMUL;
4059 
4060  return SDValue();
4061 }
4062 
4063 /// Return true if divmod libcall is available.
4064 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4065  const TargetLowering &TLI) {
4066  RTLIB::Libcall LC;
4067  EVT NodeType = Node->getValueType(0);
4068  if (!NodeType.isSimple())
4069  return false;
4070  switch (NodeType.getSimpleVT().SimpleTy) {
4071  default: return false; // No libcall for vector types.
4072  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4073  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4074  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4075  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4076  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4077  }
4078 
4079  return TLI.getLibcallName(LC) != nullptr;
4080 }
4081 
4082 /// Issue divrem if both quotient and remainder are needed.
4083 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4084  if (Node->use_empty())
4085  return SDValue(); // This is a dead node, leave it alone.
4086 
4087  unsigned Opcode = Node->getOpcode();
4088  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4089  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4090 
4091  // DivMod lib calls can still work on non-legal types if using lib-calls.
4092  EVT VT = Node->getValueType(0);
4093  if (VT.isVector() || !VT.isInteger())
4094  return SDValue();
4095 
4096  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4097  return SDValue();
4098 
4099  // If DIVREM is going to get expanded into a libcall,
4100  // but there is no libcall available, then don't combine.
4101  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4102  !isDivRemLibcallAvailable(Node, isSigned, TLI))
4103  return SDValue();
4104 
4105  // If div is legal, it's better to do the normal expansion
4106  unsigned OtherOpcode = 0;
4107  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4108  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4109  if (TLI.isOperationLegalOrCustom(Opcode, VT))
4110  return SDValue();
4111  } else {
4112  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4113  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4114  return SDValue();
4115  }
4116 
4117  SDValue Op0 = Node->getOperand(0);
4118  SDValue Op1 = Node->getOperand(1);
4119  SDValue combined;
4120  for (SDNode *User : Op0.getNode()->uses()) {
4121  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4122  User->use_empty())
4123  continue;
4124  // Convert the other matching node(s), too;
4125  // otherwise, the DIVREM may get target-legalized into something
4126  // target-specific that we won't be able to recognize.
4127  unsigned UserOpc = User->getOpcode();
4128  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4129  User->getOperand(0) == Op0 &&
4130  User->getOperand(1) == Op1) {
4131  if (!combined) {
4132  if (UserOpc == OtherOpcode) {
4133  SDVTList VTs = DAG.getVTList(VT, VT);
4134  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4135  } else if (UserOpc == DivRemOpc) {
4136  combined = SDValue(User, 0);
4137  } else {
4138  assert(UserOpc == Opcode);
4139  continue;
4140  }
4141  }
4142  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4143  CombineTo(User, combined);
4144  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4145  CombineTo(User, combined.getValue(1));
4146  }
4147  }
4148  return combined;
4149 }
4150 
4152  SDValue N0 = N->getOperand(0);
4153  SDValue N1 = N->getOperand(1);
4154  EVT VT = N->getValueType(0);
4155  SDLoc DL(N);
4156 
4157  unsigned Opc = N->getOpcode();
4158  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4160 
4161  // X / undef -> undef
4162  // X % undef -> undef
4163  // X / 0 -> undef
4164  // X % 0 -> undef
4165  // NOTE: This includes vectors where any divisor element is zero/undef.
4166  if (DAG.isUndef(Opc, {N0, N1}))
4167  return DAG.getUNDEF(VT);
4168 
4169  // undef / X -> 0
4170  // undef % X -> 0
4171  if (N0.isUndef())
4172  return DAG.getConstant(0, DL, VT);
4173 
4174  // 0 / X -> 0
4175  // 0 % X -> 0
4177  if (N0C && N0C->isZero())
4178  return N0;
4179 
4180  // X / X -> 1
4181  // X % X -> 0
4182  if (N0 == N1)
4183  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4184 
4185  // X / 1 -> X
4186  // X % 1 -> 0
4187  // If this is a boolean op (single-bit element type), we can't have
4188  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4189  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4190  // it's a 1.
4191  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4192  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4193 
4194  return SDValue();
4195 }
4196 
4197 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4198  SDValue N0 = N->getOperand(0);
4199  SDValue N1 = N->getOperand(1);
4200  EVT VT = N->getValueType(0);
4201  EVT CCVT = getSetCCResultType(VT);
4202  SDLoc DL(N);
4203 
4204  // fold (sdiv c1, c2) -> c1/c2
4205  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4206  return C;
4207 
4208  // fold vector ops
4209  if (VT.isVector())
4210  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4211  return FoldedVOp;
4212 
4213  // fold (sdiv X, -1) -> 0-X
4215  if (N1C && N1C->isAllOnes())
4216  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4217 
4218  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4219  if (N1C && N1C->getAPIntValue().isMinSignedValue())
4220  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4221  DAG.getConstant(1, DL, VT),
4222  DAG.getConstant(0, DL, VT));
4223 
4224  if (SDValue V = simplifyDivRem(N, DAG))
4225  return V;
4226 
4227  if (SDValue NewSel = foldBinOpIntoSelect(N))
4228  return NewSel;
4229 
4230  // If we know the sign bits of both operands are zero, strength reduce to a
4231  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4232  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4233  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4234 
4235  if (SDValue V = visitSDIVLike(N0, N1, N)) {
4236  // If the corresponding remainder node exists, update its users with
4237  // (Dividend - (Quotient * Divisor).
4238  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4239  { N0, N1 })) {
4240  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4241  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4242  AddToWorklist(Mul.getNode());
4243  AddToWorklist(Sub.getNode());
4244  CombineTo(RemNode, Sub);
4245  }
4246  return V;
4247  }
4248 
4249  // sdiv, srem -> sdivrem
4250  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4251  // true. Otherwise, we break the simplification logic in visitREM().
4253  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4254  if (SDValue DivRem = useDivRem(N))
4255  return DivRem;
4256 
4257  return SDValue();
4258 }
4259 
4260 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4261  SDLoc DL(N);
4262  EVT VT = N->getValueType(0);
4263  EVT CCVT = getSetCCResultType(VT);
4264  unsigned BitWidth = VT.getScalarSizeInBits();
4265 
4266  // Helper for determining whether a value is a power-2 constant scalar or a
4267  // vector of such elements.
4268  auto IsPowerOfTwo = [](ConstantSDNode *C) {
4269  if (C->isZero() || C->isOpaque())
4270  return false;
4271  if (C->getAPIntValue().isPowerOf2())
4272  return true;
4273  if (C->getAPIntValue().isNegatedPowerOf2())
4274  return true;
4275  return false;
4276  };
4277 
4278  // fold (sdiv X, pow2) -> simple ops after legalize
4279  // FIXME: We check for the exact bit here because the generic lowering gives
4280  // better results in that case. The target-specific lowering should learn how
4281  // to handle exact sdivs efficiently.
4282  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4283  // Target-specific implementation of sdiv x, pow2.
4284  if (SDValue Res = BuildSDIVPow2(N))
4285  return Res;
4286 
4287  // Create constants that are functions of the shift amount value.
4288  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4289  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4290  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4291  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4292  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4293  if (!isConstantOrConstantVector(Inexact))
4294  return SDValue();
4295 
4296  // Splat the sign bit into the register
4297  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4298  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4299  AddToWorklist(Sign.getNode());
4300 
4301  // Add (N0 < 0) ? abs2 - 1 : 0;
4302  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4303  AddToWorklist(Srl.getNode());
4304  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4305  AddToWorklist(Add.getNode());
4306  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4307  AddToWorklist(Sra.getNode());
4308 
4309  // Special case: (sdiv X, 1) -> X
4310  // Special Case: (sdiv X, -1) -> 0-X
4311  SDValue One = DAG.getConstant(1, DL, VT);
4312  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4313  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4314  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4315  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4316  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4317 
4318  // If dividing by a positive value, we're done. Otherwise, the result must
4319  // be negated.
4320  SDValue Zero = DAG.getConstant(0, DL, VT);
4321  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4322 
4323  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4324  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4325  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4326  return Res;
4327  }
4328 
4329  // If integer divide is expensive and we satisfy the requirements, emit an
4330  // alternate sequence. Targets may check function attributes for size/speed
4331  // trade-offs.
4333  if (isConstantOrConstantVector(N1) &&
4334  !TLI.isIntDivCheap(N->getValueType(0), Attr))
4335  if (SDValue Op = BuildSDIV(N))
4336  return Op;
4337 
4338  return SDValue();
4339 }
4340 
4341 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4342  SDValue N0 = N->getOperand(0);
4343  SDValue N1 = N->getOperand(1);
4344  EVT VT = N->getValueType(0);
4345  EVT CCVT = getSetCCResultType(VT);
4346  SDLoc DL(N);
4347 
4348  // fold (udiv c1, c2) -> c1/c2
4349  if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4350  return C;
4351 
4352  // fold vector ops
4353  if (VT.isVector())
4354  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4355  return FoldedVOp;
4356 
4357  // fold (udiv X, -1) -> select(X == -1, 1, 0)
4359  if (N1C && N1C->isAllOnes())
4360  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4361  DAG.getConstant(1, DL, VT),
4362  DAG.getConstant(0, DL, VT));
4363 
4364  if (SDValue V = simplifyDivRem(N, DAG))
4365  return V;
4366 
4367  if (SDValue NewSel = foldBinOpIntoSelect(N))
4368  return NewSel;
4369 
4370  if (SDValue V = visitUDIVLike(N0, N1, N)) {
4371  // If the corresponding remainder node exists, update its users with
4372  // (Dividend - (Quotient * Divisor).
4373  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4374  { N0, N1 })) {
4375  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4376  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4377  AddToWorklist(Mul.getNode());
4378  AddToWorklist(Sub.getNode());
4379  CombineTo(RemNode, Sub);
4380  }
4381  return V;
4382  }
4383 
4384  // sdiv, srem -> sdivrem
4385  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4386  // true. Otherwise, we break the simplification logic in visitREM().
4388  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4389  if (SDValue DivRem = useDivRem(N))
4390  return DivRem;
4391 
4392  return SDValue();
4393 }
4394 
4395 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4396  SDLoc DL(N);
4397  EVT VT = N->getValueType(0);
4398 
4399  // fold (udiv x, (1 << c)) -> x >>u c
4400  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4401  DAG.isKnownToBeAPowerOfTwo(N1)) {
4402  SDValue LogBase2 = BuildLogBase2(N1, DL);
4403  AddToWorklist(LogBase2.getNode());
4404 
4405  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4406  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4407  AddToWorklist(Trunc.getNode());
4408  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4409  }
4410 
4411  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4412  if (N1.getOpcode() == ISD::SHL) {
4413  SDValue N10 = N1.getOperand(0);
4414  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4415  DAG.isKnownToBeAPowerOfTwo(N10)) {
4416  SDValue LogBase2 = BuildLogBase2(N10, DL);
4417  AddToWorklist(LogBase2.getNode());
4418 
4419  EVT ADDVT = N1.getOperand(1).getValueType();
4420  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4421  AddToWorklist(Trunc.getNode());
4422  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4423  AddToWorklist(Add.getNode());
4424  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4425  }
4426  }
4427 
4428  // fold (udiv x, c) -> alternate
4430  if (isConstantOrConstantVector(N1) &&
4431  !TLI.isIntDivCheap(N->getValueType(0), Attr))
4432  if (SDValue Op = BuildUDIV(N))
4433  return Op;
4434 
4435  return SDValue();
4436 }
4437 
4438 // handles ISD::SREM and ISD::UREM
4439 SDValue DAGCombiner::visitREM(SDNode *N) {
4440  unsigned Opcode = N->getOpcode();
4441  SDValue N0 = N->getOperand(0);
4442  SDValue N1 = N->getOperand(1);
4443  EVT VT = N->getValueType(0);
4444  EVT CCVT = getSetCCResultType(VT);
4445 
4446  bool isSigned = (Opcode == ISD::SREM);
4447  SDLoc DL(N);
4448 
4449  // fold (rem c1, c2) -> c1%c2
4451  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4452  return C;
4453 
4454  // fold (urem X, -1) -> select(X == -1, 0, x)
4455  if (!isSigned && N1C && N1C->isAllOnes())
4456  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4457  DAG.getConstant(0, DL, VT), N0);
4458 
4459  if (SDValue V = simplifyDivRem(N, DAG))
4460  return V;
4461 
4462  if (SDValue NewSel = foldBinOpIntoSelect(N))
4463  return NewSel;
4464 
4465  if (isSigned) {
4466  // If we know the sign bits of both operands are zero, strength reduce to a
4467  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4468  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4469  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4470  } else {
4471  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4472  // fold (urem x, pow2) -> (and x, pow2-1)
4473  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4474  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4475  AddToWorklist(Add.getNode());
4476  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4477  }
4478  if (N1.getOpcode() == ISD::SHL &&
4479  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4480  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4481  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4482  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4483  AddToWorklist(Add.getNode());
4484  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4485  }
4486  }
4487 
4489 
4490  // If X/C can be simplified by the division-by-constant logic, lower
4491  // X%C to the equivalent of X-X/C*C.
4492  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4493  // speculative DIV must not cause a DIVREM conversion. We guard against this
4494  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4495  // combine will not return a DIVREM. Regardless, checking cheapness here
4496  // makes sense since the simplification results in fatter code.
4497  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4498  SDValue OptimizedDiv =
4499  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4500  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4501  // If the equivalent Div node also exists, update its users.
4502  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4503  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4504  { N0, N1 }))
4505  CombineTo(DivNode, OptimizedDiv);
4506  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4507  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4508  AddToWorklist(OptimizedDiv.getNode());
4509  AddToWorklist(Mul.getNode());
4510  return Sub;
4511  }
4512  }
4513 
4514  // sdiv, srem -> sdivrem
4515  if (SDValue DivRem = useDivRem(N))
4516  return DivRem.getValue(1);
4517 
4518  return SDValue();
4519 }
4520 
4521 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4522  SDValue N0 = N->getOperand(0);
4523  SDValue N1 = N->getOperand(1);
4524  EVT VT = N->getValueType(0);
4525  SDLoc DL(N);
4526 
4527  // fold (mulhs c1, c2)
4528  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4529  return C;
4530 
4531  // canonicalize constant to RHS.
4534  return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4535 
4536  if (VT.isVector()) {
4537  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4538  return FoldedVOp;
4539 
4540  // fold (mulhs x, 0) -> 0
4541  // do not return N1, because undef node may exist.
4543  return DAG.getConstant(0, DL, VT);
4544  }
4545 
4546  // fold (mulhs x, 0) -> 0
4547  if (isNullConstant(N1))
4548  return N1;
4549 
4550  // fold (mulhs x, 1) -> (sra x, size(x)-1)
4551  if (isOneConstant(N1))
4552  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4553  DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4555 
4556  // fold (mulhs x, undef) -> 0
4557  if (N0.isUndef() || N1.isUndef())
4558  return DAG.getConstant(0, DL, VT);
4559 
4560  // If the type twice as wide is legal, transform the mulhs to a wider multiply
4561  // plus a shift.
4562  if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4563  !VT.isVector()) {
4564  MVT Simple = VT.getSimpleVT();
4565  unsigned SimpleSize = Simple.getSizeInBits();
4566  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4567  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4568  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4569  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4570  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4571  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4572  DAG.getConstant(SimpleSize, DL,
4574  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4575  }
4576  }
4577 
4578  return SDValue();
4579 }
4580 
4581 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4582  SDValue N0 = N->getOperand(0);
4583  SDValue N1 = N->getOperand(1);
4584  EVT VT = N->getValueType(0);
4585  SDLoc DL(N);
4586 
4587  // fold (mulhu c1, c2)
4588  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4589  return C;
4590 
4591  // canonicalize constant to RHS.
4594  return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4595 
4596  if (VT.isVector()) {
4597  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4598  return FoldedVOp;
4599 
4600  // fold (mulhu x, 0) -> 0
4601  // do not return N1, because undef node may exist.
4603  return DAG.getConstant(0, DL, VT);
4604  }
4605 
4606  // fold (mulhu x, 0) -> 0
4607  if (isNullConstant(N1))
4608  return N1;
4609 
4610  // fold (mulhu x, 1) -> 0
4611  if (isOneConstant(N1))
4612  return DAG.getConstant(0, DL, N0.getValueType());
4613 
4614  // fold (mulhu x, undef) -> 0
4615  if (N0.isUndef() || N1.isUndef())
4616  return DAG.getConstant(0, DL, VT);
4617 
4618  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4619  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4620  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4621  unsigned NumEltBits = VT.getScalarSizeInBits();
4622  SDValue LogBase2 = BuildLogBase2(N1, DL);
4623  SDValue SRLAmt = DAG.getNode(
4624  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4625  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4626  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4627  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4628  }
4629 
4630  // If the type twice as wide is legal, transform the mulhu to a wider multiply
4631  // plus a shift.
4632  if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4633  !VT.isVector()) {
4634  MVT Simple = VT.getSimpleVT();
4635  unsigned SimpleSize = Simple.getSizeInBits();
4636  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4637  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4638  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4639  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4640  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4641  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4642  DAG.getConstant(SimpleSize, DL,
4644  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4645  }
4646  }
4647 
4648  // Simplify the operands using demanded-bits information.
4649  // We don't have demanded bits support for MULHU so this just enables constant
4650  // folding based on known bits.
4651  if (SimplifyDemandedBits(SDValue(N, 0)))
4652  return SDValue(N, 0);
4653 
4654  return SDValue();
4655 }
4656 
4657 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4658 /// give the opcodes for the two computations that are being performed. Return
4659 /// true if a simplification was made.
4660 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4661  unsigned HiOp) {
4662  // If the high half is not needed, just compute the low half.
4663  bool HiExists = N->hasAnyUseOfValue(1);
4664  if (!HiExists && (!LegalOperations ||
4665  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4666  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4667  return CombineTo(N, Res, Res);
4668  }
4669 
4670  // If the low half is not needed, just compute the high half.
4671  bool LoExists = N->hasAnyUseOfValue(0);
4672  if (!LoExists && (!LegalOperations ||
4673  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4674  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4675  return CombineTo(N, Res, Res);
4676  }
4677 
4678  // If both halves are used, return as it is.
4679  if (LoExists && HiExists)
4680  return SDValue();
4681 
4682  // If the two computed results can be simplified separately, separate them.
4683  if (LoExists) {
4684  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4685  AddToWorklist(Lo.getNode());
4686  SDValue LoOpt = combine(Lo.getNode());
4687  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4688  (!LegalOperations ||
4689  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4690  return CombineTo(N, LoOpt, LoOpt);
4691  }
4692 
4693  if (HiExists) {
4694  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4695  AddToWorklist(Hi.getNode());
4696  SDValue HiOpt = combine(Hi.getNode());
4697  if (HiOpt.getNode() && HiOpt != Hi &&
4698  (!LegalOperations ||
4699  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4700  return CombineTo(N, HiOpt, HiOpt);
4701  }
4702 
4703  return SDValue();
4704 }
4705 
4706 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4707  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4708  return Res;
4709 
4710  EVT VT = N->getValueType(0);
4711  SDLoc DL(N);
4712 
4713  // If the type is twice as wide is legal, transform the mulhu to a wider
4714  // multiply plus a shift.
4715  if (VT.isSimple() && !VT.isVector()) {
4716  MVT Simple = VT.getSimpleVT();
4717  unsigned SimpleSize = Simple.getSizeInBits();
4718  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4719  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4720  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4721  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4722  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4723  // Compute the high part as N1.
4724  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4725  DAG.getConstant(SimpleSize, DL,
4726  getShiftAmountTy(Lo.getValueType())));
4727  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4728  // Compute the low part as N0.
4729  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4730  return CombineTo(N, Lo, Hi);
4731  }
4732  }
4733 
4734  return SDValue();
4735 }
4736 
4737 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4738  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4739  return Res;
4740 
4741  EVT VT = N->getValueType(0);
4742  SDLoc DL(N);
4743 
4744  // (umul_lohi N0, 0) -> (0, 0)
4745  if (isNullConstant(N->getOperand(1))) {
4746  SDValue Zero = DAG.getConstant(0, DL, VT);
4747  return CombineTo(N, Zero, Zero);
4748  }
4749 
4750  // (umul_lohi N0, 1) -> (N0, 0)
4751  if (isOneConstant(N->getOperand(1))) {
4752  SDValue Zero = DAG.getConstant(0, DL, VT);
4753  return CombineTo(N, N->getOperand(0), Zero);
4754  }
4755 
4756  // If the type is twice as wide is legal, transform the mulhu to a wider
4757  // multiply plus a shift.
4758  if (VT.isSimple() && !VT.isVector()) {
4759  MVT Simple = VT.getSimpleVT();
4760  unsigned SimpleSize = Simple.getSizeInBits();
4761  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4762  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4763  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4764  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4765  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4766  // Compute the high part as N1.
4767  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4768  DAG.getConstant(SimpleSize, DL,
4769  getShiftAmountTy(Lo.getValueType())));
4770  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4771  // Compute the low part as N0.
4772  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4773  return CombineTo(N, Lo, Hi);
4774  }
4775  }
4776 
4777  return SDValue();
4778 }
4779 
4780 SDValue DAGCombiner::visitMULO(SDNode *N) {
4781  SDValue N0 = N->getOperand(0);
4782  SDValue N1 = N->getOperand(1);
4783  EVT VT = N0.getValueType();
4784  bool IsSigned = (ISD::SMULO == N->getOpcode());
4785 
4786  EVT CarryVT = N->getValueType(1);
4787  SDLoc DL(N);
4788 
4791 
4792  // fold operation with constant operands.
4793  // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4794  // multiple results.
4795  if (N0C && N1C) {
4796  bool Overflow;
4797  APInt Result =
4798  IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4799  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4800  return CombineTo(N, DAG.getConstant(Result, DL, VT),
4801  DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4802  }
4803 
4804  // canonicalize constant to RHS.
4807  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4808 
4809  // fold (mulo x, 0) -> 0 + no carry out
4810  if (isNullOrNullSplat(N1))
4811  return CombineTo(N, DAG.getConstant(0, DL, VT),
4812  DAG.getConstant(0, DL, CarryVT));
4813 
4814  // (mulo x, 2) -> (addo x, x)
4815  if (N1C && N1C->getAPIntValue() == 2)
4816  return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4817  N->getVTList(), N0, N0);
4818 
4819  if (IsSigned) {
4820  // A 1 bit SMULO overflows if both inputs are 1.
4821  if (VT.getScalarSizeInBits() == 1) {
4822  SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4823  return CombineTo(N, And,
4824  DAG.getSetCC(DL, CarryVT, And,
4825  DAG.getConstant(0, DL, VT), ISD::SETNE));
4826  }
4827 
4828  // Multiplying n * m significant bits yields a result of n + m significant
4829  // bits. If the total number of significant bits does not exceed the
4830  // result bit width (minus 1), there is no overflow.
4831  unsigned SignBits = DAG.ComputeNumSignBits(N0);
4832  if (SignBits > 1)
4833  SignBits += DAG.ComputeNumSignBits(N1);
4834  if (SignBits > VT.getScalarSizeInBits() + 1)
4835  return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4836  DAG.getConstant(0, DL, CarryVT));
4837  } else {
4838  KnownBits N1Known = DAG.computeKnownBits(N1);
4839  KnownBits N0Known = DAG.computeKnownBits(N0);
4840  bool Overflow;
4841  (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4842  if (!Overflow)
4843  return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4844  DAG.getConstant(0, DL, CarryVT));
4845  }
4846 
4847  return SDValue();
4848 }
4849 
4850 // Function to calculate whether the Min/Max pair of SDNodes (potentially
4851 // swapped around) make a signed saturate pattern, clamping to between a signed
4852 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
4853 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
4854 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
4855 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
4857  SDValue N3, ISD::CondCode CC, unsigned &BW,
4858  bool &Unsigned) {
4859  auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
4860  ISD::CondCode CC) {
4861  // The compare and select operand should be the same or the select operands
4862  // should be truncated versions of the comparison.
4863  if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
4864  return 0;
4865  // The constants need to be the same or a truncated version of each other.
4868  if (!N1C || !N3C)
4869  return 0;
4870  const APInt &C1 = N1C->getAPIntValue();
4871  const APInt &C2 = N3C->getAPIntValue();
4872  if (C1.getBitWidth() < C2.getBitWidth() ||
4873  C1 != C2.sextOrSelf(C1.getBitWidth()))
4874  return 0;
4875  return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
4876  };
4877 
4878  // Check the initial value is a SMIN/SMAX equivalent.
4879  unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
4880  if (!Opcode0)
4881  return SDValue();
4882 
4883  SDValue N00, N01, N02, N03;
4884  ISD::CondCode N0CC;
4885  switch (N0.getOpcode()) {
4886  case ISD::SMIN:
4887  case ISD::SMAX:
4888  N00 = N02 = N0.getOperand(0);
4889  N01 = N03 = N0.getOperand(1);
4890  N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
4891  break;
4892  case ISD::SELECT_CC:
4893  N00 = N0.getOperand(0);
4894  N01 = N0.getOperand(1);
4895  N02 = N0.getOperand(2);
4896  N03 = N0.getOperand(3);
4897  N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
4898  break;
4899  case ISD::SELECT:
4900  case ISD::VSELECT:
4901  if (N0.getOperand(0).getOpcode() != ISD::SETCC)
4902  return SDValue();
4903  N00 = N0.getOperand(0).getOperand(0);
4904  N01 = N0.getOperand(0).getOperand(1);
4905  N02 = N0.getOperand(1);
4906  N03 = N0.getOperand(2);
4907  N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
4908  break;
4909  default:
4910  return SDValue();
4911  }
4912 
4913  unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
4914  if (!Opcode1 || Opcode0 == Opcode1)
4915  return SDValue();
4916 
4917  ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
4918  ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
4919  if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
4920  return SDValue();
4921 
4922  const APInt &MinC = MinCOp->getAPIntValue();
4923  const APInt &MaxC = MaxCOp->getAPIntValue();
4924  APInt MinCPlus1 = MinC + 1;
4925  if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
4926  BW = MinCPlus1.exactLogBase2() + 1;
4927  Unsigned = false;
4928  return N02;
4929  }
4930 
4931  if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
4932  BW = MinCPlus1.exactLogBase2();
4933  Unsigned = true;
4934  return N02;
4935  }
4936 
4937  return SDValue();
4938 }
4939 
4941  SDValue N3, ISD::CondCode CC,
4942  SelectionDAG &DAG) {
4943  unsigned BW;
4944  bool Unsigned;
4945  SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
4946  if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
4947  return SDValue();
4948  EVT FPVT = Fp.getOperand(0).getValueType();
4949  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
4950  if (FPVT.isVector())
4951  NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
4952  FPVT.getVectorElementCount());
4953  unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
4954  if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
4955  return SDValue();
4956  SDLoc DL(Fp);
4957  SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
4958  DAG.getValueType(NewVT.getScalarType()));
4959  return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
4960  : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
4961 }
4962 
4964  SDValue N3, ISD::CondCode CC,
4965  SelectionDAG &DAG) {
4966  // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
4967  // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
4968  // be truncated versions of the the setcc (N0/N1).
4969  if ((N0 != N2 &&
4970  (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
4971  N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
4972  return SDValue();
4975  if (!N1C || !N3C)
4976  return SDValue();
4977  const APInt &C1 = N1C->getAPIntValue();
4978  const APInt &C3 = N3C->getAPIntValue();
4979  if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
4980  C1 != C3.zextOrSelf(C1.getBitWidth()))
4981  return SDValue();
4982 
4983  unsigned BW = (C1 + 1).exactLogBase2();
4984  EVT FPVT = N0.getOperand(0).getValueType();
4985  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
4986  if (FPVT.isVector())
4987  NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
4988  FPVT.getVectorElementCount());
4990  FPVT, NewVT))
4991  return SDValue();
4992 
4993  SDValue Sat =
4994  DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
4995  DAG.getValueType(NewVT.getScalarType()));
4996  return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
4997 }
4998 
4999 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5000  SDValue N0 = N->getOperand(0);
5001  SDValue N1 = N->getOperand(1);
5002  EVT VT = N0.getValueType();
5003  unsigned Opcode = N->getOpcode();
5004  SDLoc DL(N);
5005 
5006  // fold operation with constant operands.
5007  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5008  return C;
5009 
5010  // canonicalize constant to RHS
5013  return DAG.getNode(Opcode, DL, VT, N1, N0);
5014 
5015  // fold vector ops
5016  if (VT.isVector())
5017  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5018  return FoldedVOp;
5019 
5020  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5021  // Only do this if the current op isn't legal and the flipped is.
5022  if (!TLI.isOperationLegal(Opcode, VT) &&
5023  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5024  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5025  unsigned AltOpcode;
5026  switch (Opcode) {
5027  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5028  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5029  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5030  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5031  default: llvm_unreachable("Unknown MINMAX opcode");
5032  }
5033  if (TLI.isOperationLegal(AltOpcode, VT))
5034  return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5035  }
5036 
5037  if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5039  N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5040  return S;
5041  if (Opcode == ISD::UMIN)
5042  if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5043  return S;
5044 
5045  // Simplify the operands using demanded-bits information.
5046  if (SimplifyDemandedBits(SDValue(N, 0)))
5047  return SDValue(N, 0);
5048 
5049  return SDValue();
5050 }
5051 
5052 /// If this is a bitwise logic instruction and both operands have the same
5053 /// opcode, try to sink the other opcode after the logic instruction.
5054 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5055  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5056  EVT VT = N0.getValueType();
5057  unsigned LogicOpcode = N->getOpcode();
5058  unsigned HandOpcode = N0.getOpcode();
5059  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
5060  LogicOpcode == ISD::XOR) && "Expected logic opcode");
5061  assert(HandOpcode == N1.getOpcode() && "Bad input!");
5062 
5063  // Bail early if none of these transforms apply.
5064  if (N0.getNumOperands() == 0)
5065  return SDValue();
5066 
5067  // FIXME: We should check number of uses of the operands to not increase
5068  // the instruction count for all transforms.
5069 
5070  // Handle size-changing casts.
5071  SDValue X = N0.getOperand(0);
5072  SDValue Y = N1.getOperand(0);
5073  EVT XVT = X.getValueType();
5074  SDLoc DL(N);
5075  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
5076  HandOpcode == ISD::SIGN_EXTEND) {
5077  // If both operands have other uses, this transform would create extra
5078  // instructions without eliminating anything.
5079  if (!N0.hasOneUse() && !N1.hasOneUse())
5080  return SDValue();
5081  // We need matching integer source types.
5082  if (XVT != Y.getValueType())
5083  return SDValue();
5084  // Don't create an illegal op during or after legalization. Don't ever
5085  // create an unsupported vector op.
5086  if ((VT.isVector() || LegalOperations) &&
5087  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5088  return SDValue();
5089  // Avoid infinite looping with PromoteIntBinOp.
5090  // TODO: Should we apply desirable/legal constraints to all opcodes?
5091  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
5092  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5093  return SDValue();
5094  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5095  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5096  return DAG.getNode(HandOpcode, DL, VT, Logic);
5097  }
5098 
5099  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5100  if (HandOpcode == ISD::TRUNCATE) {
5101  // If both operands have other uses, this transform would create extra
5102  // instructions without eliminating anything.
5103  if (!N0.hasOneUse() && !N1.hasOneUse())
5104  return SDValue();
5105  // We need matching source types.
5106  if (XVT != Y.getValueType())
5107  return SDValue();
5108  // Don't create an illegal op during or after legalization.
5109  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5110  return SDValue();
5111  // Be extra careful sinking truncate. If it's free, there's no benefit in
5112  // widening a binop. Also, don't create a logic op on an illegal type.
5113  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5114  return SDValue();
5115  if (!TLI.isTypeLegal(XVT))
5116  return SDValue();
5117  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5118  return DAG.getNode(HandOpcode, DL, VT, Logic);
5119  }
5120 
5121  // For binops SHL/SRL/SRA/AND:
5122  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5123  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5124  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5125  N0.getOperand(1) == N1.getOperand(1)) {
5126  // If either operand has other uses, this transform is not an improvement.
5127  if (!N0.hasOneUse() || !N1.hasOneUse())
5128  return SDValue();
5129  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5130  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5131  }
5132 
5133  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5134  if (HandOpcode == ISD::BSWAP) {
5135  // If either operand has other uses, this transform is not an improvement.
5136  if (!N0.hasOneUse() || !N1.hasOneUse())
5137  return SDValue();
5138  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5139  return DAG.getNode(HandOpcode, DL, VT, Logic);
5140  }
5141 
5142  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5143  // Only perform this optimization up until type legalization, before
5144  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5145  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5146  // we don't want to undo this promotion.
5147  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5148  // on scalars.
5149  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5151  // Input types must be integer and the same.
5152  if (XVT.isInteger() && XVT == Y.getValueType() &&
5153  !(VT.isVector() && TLI.isTypeLegal(VT) &&
5154  !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5155  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5156  return DAG.getNode(HandOpcode, DL, VT, Logic);
5157  }
5158  }
5159 
5160  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5161  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5162  // If both shuffles use the same mask, and both shuffle within a single
5163  // vector, then it is worthwhile to move the swizzle after the operation.
5164  // The type-legalizer generates this pattern when loading illegal
5165  // vector types from memory. In many cases this allows additional shuffle
5166  // optimizations.
5167  // There are other cases where moving the shuffle after the xor/and/or
5168  // is profitable even if shuffles don't perform a swizzle.
5169  // If both shuffles use the same mask, and both shuffles have the same first
5170  // or second operand, then it might still be profitable to move the shuffle
5171  // after the xor/and/or operation.
5172  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5173  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5174  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5175  assert(X.getValueType() == Y.getValueType() &&
5176  "Inputs to shuffles are not the same type");
5177 
5178  // Check that both shuffles use the same mask. The masks are known to be of
5179  // the same length because the result vector type is the same.
5180  // Check also that shuffles have only one use to avoid introducing extra
5181  // instructions.
5182  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5183  !SVN0->getMask().equals(SVN1->getMask()))
5184  return SDValue();
5185 
5186  // Don't try to fold this node if it requires introducing a
5187  // build vector of all zeros that might be illegal at this stage.
5188  SDValue ShOp = N0.getOperand(1);
5189  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5190  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5191 
5192  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5193  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5194  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5195  N0.getOperand(0), N1.getOperand(0));
5196  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5197  }
5198 
5199  // Don't try to fold this node if it requires introducing a
5200  // build vector of all zeros that might be illegal at this stage.
5201  ShOp = N0.getOperand(0);
5202  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5203  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5204 
5205  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5206  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5207  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5208  N1.getOperand(1));
5209  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5210  }
5211  }
5212 
5213  return SDValue();
5214 }
5215 
5216 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5217 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5218  const SDLoc &DL) {
5219  SDValue LL, LR, RL, RR, N0CC, N1CC;
5220  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5221  !isSetCCEquivalent(N1, RL, RR, N1CC))
5222  return SDValue();
5223 
5224  assert(N0.getValueType() == N1.getValueType() &&
5225  "Unexpected operand types for bitwise logic op");
5226  assert(LL.getValueType() == LR.getValueType() &&
5227  RL.getValueType() == RR.getValueType() &&
5228  "Unexpected operand types for setcc");
5229 
5230  // If we're here post-legalization or the logic op type is not i1, the logic
5231  // op type must match a setcc result type. Also, all folds require new
5232  // operations on the left and right operands, so those types must match.
5233  EVT VT = N0.getValueType();
5234  EVT OpVT = LL.getValueType();
5235  if (LegalOperations || VT.getScalarType() != MVT::i1)
5236  if (VT != getSetCCResultType(OpVT))
5237  return SDValue();
5238  if (OpVT != RL.getValueType())
5239  return SDValue();
5240 
5241  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5242  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5243  bool IsInteger = OpVT.isInteger();
5244  if (LR == RR && CC0 == CC1 && IsInteger) {
5245  bool IsZero = isNullOrNullSplat(LR);
5246  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5247 
5248  // All bits clear?
5249  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5250  // All sign bits clear?
5251  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5252  // Any bits set?
5253  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5254  // Any sign bits set?
5255  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5256 
5257  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5258  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5259  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5260  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5261  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5262  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5263  AddToWorklist(Or.getNode());
5264  return DAG.getSetCC(DL, VT, Or, LR, CC1);
5265  }
5266 
5267  // All bits set?
5268  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5269  // All sign bits set?
5270  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5271  // Any bits clear?
5272  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5273  // Any sign bits clear?
5274  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5275 
5276  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5277  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5278  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5279  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5280  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5281  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5282  AddToWorklist(And.getNode());
5283  return DAG.getSetCC(DL, VT, And, LR, CC1);
5284  }
5285  }
5286 
5287  // TODO: What is the 'or' equivalent of this fold?
5288  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5289  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5290  IsInteger && CC0 == ISD::SETNE &&
5291  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5292  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5293  SDValue One = DAG.getConstant(1, DL, OpVT);
5294  SDValue Two = DAG.getConstant(2, DL, OpVT);
5295  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5296  AddToWorklist(Add.getNode());
5297  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5298  }
5299 
5300  // Try more general transforms if the predicates match and the only user of
5301  // the compares is the 'and' or 'or'.
5302  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5303  N0.hasOneUse() && N1.hasOneUse()) {
5304  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5305  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5306  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5307  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5308  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5309  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5310  SDValue Zero = DAG.getConstant(0, DL, OpVT);
5311  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5312  }
5313 
5314  // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5315  // TODO - support non-uniform vector amounts.
5316  if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5317  // Match a shared variable operand and 2 non-opaque constant operands.
5320  if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5321  const APInt &CMax =
5323  const APInt &CMin =
5325  // The difference of the constants must be a single bit.
5326  if ((CMax - CMin).isPowerOf2()) {
5327  // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5328  // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5329  SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5330  SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5331  SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5332  SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5333  SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5334  SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5335  SDValue Zero = DAG.getConstant(0, DL, OpVT);
5336  return DAG.getSetCC(DL, VT, And, Zero, CC0);
5337  }
5338  }
5339  }
5340  }
5341 
5342  // Canonicalize equivalent operands to LL == RL.
5343  if (LL == RR && LR == RL) {
5344  CC1 = ISD::getSetCCSwappedOperands(CC1);
5345  std::swap(RL, RR);
5346  }
5347 
5348  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5349  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5350  if (LL == RL && LR == RR) {
5351  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5352  : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5353  if (NewCC != ISD::SETCC_INVALID &&
5354  (!LegalOperations ||
5355  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5356  TLI.isOperationLegal(ISD::SETCC, OpVT))))
5357  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5358  }
5359 
5360  return SDValue();
5361 }
5362 
5363 /// This contains all DAGCombine rules which reduce two values combined by
5364 /// an And operation to a single value. This makes them reusable in the context
5365 /// of visitSELECT(). Rules involving constants are not included as
5366 /// visitSELECT() already handles those cases.
5367 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5368  EVT VT = N1.getValueType();
5369  SDLoc DL(N);
5370 
5371  // fold (and x, undef) -> 0
5372  if (N0.isUndef() || N1.isUndef())
5373  return DAG.getConstant(0, DL, VT);
5374 
5375  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5376  return V;
5377 
5378  // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5379  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5380  VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5381  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5382  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5383  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5384  // immediate for an add, but it is legal if its top c2 bits are set,
5385  // transform the ADD so the immediate doesn't need to be materialized
5386  // in a register.
5387  APInt ADDC = ADDI->getAPIntValue();
5388  APInt SRLC = SRLI->getAPIntValue();
5389  if (ADDC.getMinSignedBits() <= 64 &&
5390  SRLC.ult(VT.getSizeInBits()) &&
5391  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5393  SRLC.getZExtValue());
5394  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5395  ADDC |= Mask;
5396  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5397  SDLoc DL0(N0);
5398  SDValue NewAdd =
5399  DAG.getNode(ISD::ADD, DL0, VT,
5400  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5401  CombineTo(N0.getNode(), NewAdd);
5402  // Return N so it doesn't get rechecked!
5403  return SDValue(N, 0);
5404  }
5405  }
5406  }
5407  }
5408  }
5409  }
5410 
5411  // Reduce bit extract of low half of an integer to the narrower type.
5412  // (and (srl i64:x, K), KMask) ->
5413  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5414  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5415  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5416  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5417  unsigned Size = VT.getSizeInBits();
5418  const APInt &AndMask = CAnd->getAPIntValue();
5419  unsigned ShiftBits = CShift->getZExtValue();
5420 
5421  // Bail out, this node will probably disappear anyway.
5422  if (ShiftBits == 0)
5423  return SDValue();
5424 
5425  unsigned MaskBits = AndMask.countTrailingOnes();
5426  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5427 
5428  if (AndMask.isMask() &&
5429  // Required bits must not span the two halves of the integer and
5430  // must fit in the half size type.
5431  (ShiftBits + MaskBits <= Size / 2) &&
5432  TLI.isNarrowingProfitable(VT, HalfVT) &&
5433  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5434  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5435  TLI.isTruncateFree(VT, HalfVT) &&
5436  TLI.isZExtFree(HalfVT, VT)) {
5437  // The isNarrowingProfitable is to avoid regressions on PPC and
5438  // AArch64 which match a few 64-bit bit insert / bit extract patterns
5439  // on downstream users of this. Those patterns could probably be
5440  // extended to handle extensions mixed in.
5441 
5442  SDValue SL(N0);
5443  assert(MaskBits <= Size);
5444 
5445  // Extracting the highest bit of the low half.
5446  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5447  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5448  N0.getOperand(0));
5449 
5450  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5451  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5452  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5453  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5454  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5455  }
5456  }
5457  }
5458  }
5459 
5460  return SDValue();
5461 }
5462 
5463 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5464  EVT LoadResultTy, EVT &ExtVT) {
5465  if (!AndC->getAPIntValue().isMask())
5466  return false;
5467 
5468  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5469 
5470  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5471  EVT LoadedVT = LoadN->getMemoryVT();
5472 
5473  if (ExtVT == LoadedVT &&
5474  (!LegalOperations ||
5475  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5476  // ZEXTLOAD will match without needing to change the size of the value being
5477  // loaded.
5478  return true;
5479  }
5480 
5481  // Do not change the width of a volatile or atomic loads.
5482  if (!LoadN->isSimple())
5483  return false;
5484 
5485  // Do not generate loads of non-round integer types since these can
5486  // be expensive (and would be wrong if the type is not byte sized).
5487  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5488  return false;
5489 
5490  if (LegalOperations &&
5491  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5492  return false;
5493 
5494  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5495  return false;
5496 
5497  return true;
5498 }
5499 
5500 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5501  ISD::LoadExtType ExtType, EVT &MemVT,
5502  unsigned ShAmt) {
5503  if (!LDST)
5504  return false;
5505  // Only allow byte offsets.
5506  if (ShAmt % 8)
5507  return false;
5508 
5509  // Do not generate loads of non-round integer types since these can
5510  // be expensive (and would be wrong if the type is not byte sized).
5511  if (!MemVT.isRound())
5512  return false;
5513 
5514  // Don't change the width of a volatile or atomic loads.
5515  if (!LDST->isSimple())
5516  return false;
5517 
5518  EVT LdStMemVT = LDST->getMemoryVT();
5519 
5520  // Bail out when changing the scalable property, since we can't be sure that
5521  // we're actually narrowing here.
5522  if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5523  return false;
5524 
5525  // Verify that we are actually reducing a load width here.
5526  if (LdStMemVT.bitsLT(MemVT))
5527  return false;
5528 
5529  // Ensure that this isn't going to produce an unsupported memory access.
5530  if (ShAmt) {
5531  assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5532  const unsigned ByteShAmt = ShAmt / 8;
5533  const Align LDSTAlign = LDST->getAlign();
5534  const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5535  if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5536  LDST->getAddressSpace(), NarrowAlign,
5537  LDST->getMemOperand()->getFlags()))
5538  return false;
5539  }
5540 
5541  // It's not possible to generate a constant of extended or untyped type.
5542  EVT PtrType = LDST->getBasePtr().getValueType();
5543  if (PtrType == MVT::Untyped || PtrType.isExtended())
5544  return false;
5545 
5546  if (isa<LoadSDNode>(LDST)) {
5547  LoadSDNode *Load = cast<LoadSDNode>(LDST);
5548  // Don't transform one with multiple uses, this would require adding a new
5549  // load.
5550  if (!SDValue(Load, 0).hasOneUse())
5551  return false;
5552 
5553  if (LegalOperations &&
5554  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5555  return false;
5556 
5557  // For the transform to be legal, the load must produce only two values
5558  // (the value loaded and the chain). Don't transform a pre-increment
5559  // load, for example, which produces an extra value. Otherwise the
5560  // transformation is not equivalent, and the downstream logic to replace
5561  // uses gets things wrong.
5562  if (Load->getNumValues() > 2)
5563  return false;
5564 
5565  // If the load that we're shrinking is an extload and we're not just
5566  // discarding the extension we can't simply shrink the load. Bail.
5567  // TODO: It would be possible to merge the extensions in some cases.
5568  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5569  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5570  return false;
5571 
5572  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5573  return false;
5574  } else {
5575  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5576  StoreSDNode *Store = cast<StoreSDNode>(LDST);
5577  // Can't write outside the original store
5578  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5579  return false;
5580 
5581  if (LegalOperations &&
5582  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5583  return false;
5584  }
5585  return true;
5586 }
5587 
5588 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5590  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5592  SDNode *&NodeToMask) {
5593  // Recursively search for the operands, looking for loads which can be
5594  // narrowed.
5595  for (SDValue Op : N->op_values()) {
5596  if (Op.getValueType().isVector())
5597  return false;
5598 
5599  // Some constants may need fixing up later if they are too large.
5600  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5601  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5602  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5603  NodesWithConsts.insert(N);
5604  continue;
5605  }
5606 
5607  if (!Op.hasOneUse())
5608  return false;
5609 
5610  switch(Op.getOpcode()) {
5611  case ISD::LOAD: {
5612  auto *Load = cast<LoadSDNode>(Op);
5613  EVT ExtVT;
5614  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5615  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5616 
5617  // ZEXTLOAD is already small enough.
5618  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5619  ExtVT.bitsGE(Load->getMemoryVT()))
5620  continue;
5621 
5622  // Use LE to convert equal sized loads to zext.
5623  if (ExtVT.bitsLE(Load->getMemoryVT()))
5624  Loads.push_back(Load);
5625 
5626  continue;
5627  }
5628  return false;
5629  }
5630  case ISD::ZERO_EXTEND:
5631  case ISD::AssertZext: {
5632  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5633  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5634  EVT VT = Op.getOpcode() == ISD::AssertZext ?
5635  cast<VTSDNode>(Op.getOperand(1))->getVT() :
5636  Op.getOperand(0).getValueType();
5637 
5638  // We can accept extending nodes if the mask is wider or an equal
5639  // width to the original type.
5640  if (ExtVT.bitsGE(VT))
5641  continue;
5642  break;
5643  }
5644  case ISD::OR:
5645  case ISD::XOR:
5646  case ISD::AND:
5647  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5648  NodeToMask))
5649  return false;
5650  continue;
5651  }
5652 
5653  // Allow one node which will masked along with any loads found.
5654  if (NodeToMask)
5655  return false;
5656 
5657  // Also ensure that the node to be masked only produces one data result.
5658  NodeToMask = Op.getNode();
5659  if (NodeToMask->getNumValues() > 1) {
5660  bool HasValue = false;
5661  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5662  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5663  if (VT != MVT::Glue && VT != MVT::Other) {
5664  if (HasValue) {
5665  NodeToMask = nullptr;
5666  return false;
5667  }
5668  HasValue = true;
5669  }
5670  }
5671  assert(HasValue && "Node to be masked has no data result?");
5672  }
5673  }
5674  return true;
5675 }
5676 
5677 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5678  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5679  if (!Mask)
5680  return false;
5681 
5682  if (!Mask->getAPIntValue().isMask())
5683  return false;
5684 
5685  // No need to do anything if the and directly uses a load.
5686  if (isa<LoadSDNode>(N->getOperand(0)))
5687  return false;
5688 
5690  SmallPtrSet<SDNode*, 2> NodesWithConsts;
5691  SDNode *FixupNode = nullptr;
5692  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5693  if (Loads.size() == 0)
5694  return false;
5695 
5696  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5697  SDValue MaskOp = N->getOperand(1);
5698 
5699  // If it exists, fixup the single node we allow in the tree that needs
5700  // masking.
5701  if (FixupNode) {
5702  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5703  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5704  FixupNode->getValueType(0),
5705  SDValue(FixupNode, 0), MaskOp);
5706  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5707  if (And.getOpcode() == ISD ::AND)
5708  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5709  }
5710 
5711  // Narrow any constants that need it.
5712  for (auto *LogicN : NodesWithConsts) {
5713  SDValue Op0 = LogicN->getOperand(0);
5714  SDValue Op1 = LogicN->getOperand(1);
5715 
5716  if (isa<ConstantSDNode>(Op0))
5717  std::swap(Op0, Op1);
5718 
5719  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5720  Op1, MaskOp);
5721 
5722  DAG.UpdateNodeOperands(LogicN, Op0, And);
5723  }
5724 
5725  // Create narrow loads.
5726  for (auto *Load : Loads) {
5727  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5728  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5729  SDValue(Load, 0), MaskOp);
5730  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5731  if (And.getOpcode() == ISD ::AND)
5732  And = SDValue(
5733  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5734  SDValue NewLoad = reduceLoadWidth(And.getNode());
5735  assert(NewLoad &&
5736  "Shouldn't be masking the load if it can't be narrowed");
5737  CombineTo(Load, NewLoad, NewLoad.getValue(1));
5738  }
5739  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5740  return true;
5741  }
5742  return false;
5743 }
5744 
5745 // Unfold
5746 // x & (-1 'logical shift' y)
5747 // To
5748 // (x 'opposite logical shift' y) 'logical shift' y
5749 // if it is better for performance.
5750 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5751  assert(N->getOpcode() == ISD::AND);
5752 
5753  SDValue N0 = N->getOperand(0);
5754  SDValue N1 = N->getOperand(1);
5755 
5756  // Do we actually prefer shifts over mask?
5757  if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5758  return SDValue();
5759 
5760  // Try to match (-1 '[outer] logical shift' y)
5761  unsigned OuterShift;
5762  unsigned InnerShift; // The opposite direction to the OuterShift.
5763  SDValue Y; // Shift amount.
5764  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5765  if (!M.hasOneUse())
5766  return false;
5767  OuterShift = M->getOpcode();
5768  if (OuterShift == ISD::SHL)
5769  InnerShift = ISD::SRL;
5770  else if (OuterShift == ISD::SRL)
5771  InnerShift = ISD::SHL;
5772  else
5773  return false;
5774  if (!isAllOnesConstant(M->getOperand(0)))
5775  return false;
5776  Y = M->getOperand(1);
5777  return true;
5778  };
5779 
5780  SDValue X;
5781  if (matchMask(N1))
5782  X = N0;
5783  else if (matchMask(N0))
5784  X = N1;
5785  else
5786  return SDValue();
5787 
5788  SDLoc DL(N);
5789  EVT VT = N->getValueType(0);
5790 
5791  // tmp = x 'opposite logical shift' y
5792  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5793  // ret = tmp 'logical shift' y
5794  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5795 
5796  return T1;
5797 }
5798 
5799 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5800 /// For a target with a bit test, this is expected to become test + set and save
5801 /// at least 1 instruction.
5803  assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5804 
5805  // This is probably not worthwhile without a supported type.
5806  EVT VT = And->getValueType(0);
5807  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5808  if (!TLI.isTypeLegal(VT))
5809  return SDValue();
5810 
5811  // Look through an optional extension and find a 'not'.
5812  // TODO: Should we favor test+set even without the 'not' op?
5813  SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5814  if (Not.getOpcode() == ISD::ANY_EXTEND)
5815  Not = Not.getOperand(0);
5816  if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5817  return SDValue();
5818 
5819  // Look though an optional truncation. The source operand may not be the same
5820  // type as the original 'and', but that is ok because we are masking off
5821  // everything but the low bit.
5822  SDValue Srl = Not.getOperand(0);
5823  if (Srl.getOpcode() == ISD::TRUNCATE)
5824  Srl = Srl.getOperand(0);
5825 
5826  // Match a shift-right by constant.
5827  if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5828  !isa<ConstantSDNode>(Srl.getOperand(1)))
5829  return SDValue();
5830 
5831  // We might have looked through casts that make this transform invalid.
5832  // TODO: If the source type is wider than the result type, do the mask and
5833  // compare in the source type.
5834  const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5835  unsigned VTBitWidth = VT.getSizeInBits();
5836  if (ShiftAmt.uge(VTBitWidth))
5837  return SDValue();
5838 
5839  // Turn this into a bit-test pattern using mask op + setcc:
5840  // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5841  SDLoc DL(And);
5842  SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5843  EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5844  SDValue Mask = DAG.getConstant(
5845  APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5846  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5847  SDValue Zero = DAG.getConstant(0, DL, VT);
5848  SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5849  return DAG.getZExtOrTrunc(Setcc, DL, VT);
5850 }
5851 
5852 /// For targets that support usubsat, match a bit-hack form of that operation
5853 /// that ends in 'and' and convert it.
5855  SDValue N0 = N->getOperand(0);
5856  SDValue N1 = N->getOperand(1);
5857  EVT VT = N1.getValueType();
5858 
5859  // Canonicalize SRA as operand 1.
5860  if (N0.getOpcode() == ISD::SRA)
5861  std::swap(N0, N1);
5862 
5863  // xor/add with SMIN (signmask) are logically equivalent.
5864  if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
5865  return SDValue();
5866 
5867  if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
5868  N0.getOperand(0) != N1.getOperand(0))
5869  return SDValue();
5870 
5871  unsigned BitWidth = VT.getScalarSizeInBits();
5872  ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
5873  ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
5874  if (!XorC || !XorC->getAPIntValue().isSignMask() ||
5875  !SraC || SraC->getAPIntValue() != BitWidth - 1)
5876  return SDValue();
5877 
5878  // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
5879  // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
5880  SDLoc DL(N);
5881  SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
5882  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
5883 }
5884 
5885 SDValue DAGCombiner::visitAND(SDNode *N) {
5886  SDValue N0 = N->getOperand(0);
5887  SDValue N1 = N->getOperand(1);
5888  EVT VT = N1.getValueType();
5889 
5890  // x & x --> x
5891  if (N0 == N1)
5892  return N0;
5893 
5894  // fold (and c1, c2) -> c1&c2
5895  if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5896  return C;
5897 
5898  // canonicalize constant to RHS
5901  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5902 
5903  // fold vector ops
5904  if (VT.isVector()) {
5905  if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
5906  return FoldedVOp;
5907 
5908  // fold (and x, 0) -> 0, vector edition
5910  // do not return N1, because undef node may exist in N1
5912  SDLoc(N), N1.getValueType());
5913 
5914  // fold (and x, -1) -> x, vector edition
5916  return N0;
5917 
5918  // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5919  auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5920  auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5921  if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5922  N0.hasOneUse() && N1.hasOneUse()) {
5923  EVT LoadVT = MLoad->getMemoryVT();
5924  EVT ExtVT = VT;
5925  if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5926  // For this AND to be a zero extension of the masked load the elements
5927  // of the BuildVec must mask the bottom bits of the extended element
5928  // type
5929  if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5930  uint64_t ElementSize =
5932  if (Splat->getAPIntValue().isMask(ElementSize)) {
5933  return DAG.getMaskedLoad(
5934  ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5935  MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5936  LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5937  ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5938  }
5939  }
5940  }
5941  }
5942  }
5943 
5944  // fold (and x, -1) -> x
5945  if (isAllOnesConstant(N1))
5946  return N0;
5947 
5948  // if (and x, c) is known to be zero, return 0
5949  unsigned BitWidth = VT.getScalarSizeInBits();
5951  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
5952  return DAG.getConstant(0, SDLoc(N), VT);
5953 
5954  if (SDValue NewSel = foldBinOpIntoSelect(N))
5955  return NewSel;
5956 
5957  // reassociate and
5958  if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5959  return RAND;
5960 
5961  // Try to convert a constant mask AND into a shuffle clear mask.
5962  if (VT.isVector())
5963  if (SDValue Shuffle = XformToShuffleWithZero(N))
5964  return Shuffle;
5965 
5966  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
5967  return Combined;
5968 
5969  // fold (and (or x, C), D) -> D if (C & D) == D
5970  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5971  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5972  };
5973  if (N0.getOpcode() == ISD::OR &&
5974  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5975  return N1;
5976  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5977  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5978  SDValue N0Op0 = N0.getOperand(0);
5979  APInt Mask = ~N1C->getAPIntValue();
5980  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5981  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5982  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5983  N0.getValueType(), N0Op0);
5984 
5985  // Replace uses of the AND with uses of the Zero extend node.
5986  CombineTo(N, Zext);
5987 
5988  // We actually want to replace all uses of the any_extend with the
5989  // zero_extend, to avoid duplicating things. This will later cause this
5990  // AND to be folded.
5991  CombineTo(N0.getNode(), Zext);
5992  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5993  }
5994  }
5995 
5996  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5997  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5998  // already be zero by virtue of the width of the base type of the load.
5999  //
6000  // the 'X' node here can either be nothing or an extract_vector_elt to catch
6001  // more cases.
6002  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6004  N0.getOperand(0).getOpcode() == ISD::LOAD &&
6005  N0.getOperand(0).getResNo() == 0) ||
6006  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6007  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
6008  N0 : N0.getOperand(0) );
6009 
6010  // Get the constant (if applicable) the zero'th operand is being ANDed with.
6011  // This can be a pure constant or a vector splat, in which case we treat the
6012  // vector as a scalar and use the splat value.
6014  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
6015  Constant = C->getAPIntValue();
6016  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6017  APInt SplatValue, SplatUndef;
6018  unsigned SplatBitSize;
6019  bool HasAnyUndefs;
6020  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
6021  SplatBitSize, HasAnyUndefs);
6022  if (IsSplat) {
6023  // Undef bits can contribute to a possible optimisation if set, so
6024  // set them.
6025  SplatValue |= SplatUndef;
6026 
6027  // The splat value may be something like "0x00FFFFFF", which means 0 for
6028  // the first vector value and FF for the rest, repeating. We need a mask
6029  // that will apply equally to all members of the vector, so AND all the
6030  // lanes of the constant together.
6031  unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6032 
6033  // If the splat value has been compressed to a bitlength lower
6034  // than the size of the vector lane, we need to re-expand it to
6035  // the lane size.
6036  if (EltBitWidth > SplatBitSize)
6037  for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
6038  SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
6039  SplatValue |= SplatValue.shl(SplatBitSize);
6040 
6041  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6042  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6043  if ((SplatBitSize % EltBitWidth) == 0) {
6044  Constant = APInt::getAllOnes(EltBitWidth);
6045  for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6046  Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6047  }
6048  }
6049  }
6050 
6051  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
6052  // actually legal and isn't going to get expanded, else this is a false
6053  // optimisation.
6054  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
6055  Load->getValueType(0),
6056  Load->getMemoryVT());
6057 
6058  // Resize the constant to the same size as the original memory access before
6059  // extension. If it is still the AllOnesValue then this AND is completely
6060  // unneeded.
6061  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
6062 
6063  bool B;
6064  switch (Load->getExtensionType()) {
6065  default: B = false; break;
6066  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
6067  case ISD::ZEXTLOAD:
6068  case ISD::NON_EXTLOAD: B = true; break;
6069  }
6070 
6071  if (B && Constant.isAllOnes()) {
6072  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
6073  // preserve semantics once we get rid of the AND.
6074  SDValue NewLoad(Load, 0);
6075 
6076  // Fold the AND away. NewLoad may get replaced immediately.
6077  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
6078 
6079  if (Load->getExtensionType() == ISD::EXTLOAD) {
6080  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
6081  Load->getValueType(0), SDLoc(Load),
6082  Load->getChain(), Load->getBasePtr(),
6083  Load->getOffset(), Load->getMemoryVT(),
6084  Load->getMemOperand());
6085  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6086  if (Load->getNumValues() == 3) {
6087  // PRE/POST_INC loads have 3 values.
6088  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6089  NewLoad.getValue(2) };
6090  CombineTo(Load, To, 3, true);
6091  } else {
6092  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6093  }
6094  }
6095 
6096  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6097  }
6098  }
6099 
6100  // fold (and (masked_gather x)) -> (zext_masked_gather x)
6101  if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6102  EVT MemVT = GN0->getMemoryVT();
6103  EVT ScalarVT = MemVT.getScalarType();
6104 
6105  if (SDValue(GN0, 0).hasOneUse() &&
6106  isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6107  TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6108  SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
6109  GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
6110 
6111  SDValue ZExtLoad = DAG.getMaskedGather(
6112  DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6113  GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6114 
6115  CombineTo(N, ZExtLoad);
6116  AddToWorklist(ZExtLoad.getNode());
6117  // Avoid recheck of N.
6118  return SDValue(N, 0);
6119  }
6120  }
6121 
6122  // fold (and (load x), 255) -> (zextload x, i8)
6123  // fold (and (extload x, i16), 255) -> (zextload x, i8)
6124  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
6125  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
6126  (N0.getOpcode() == ISD::ANY_EXTEND &&
6127  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
6128  if (SDValue Res = reduceLoadWidth(N)) {
6129  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
6130  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
6131  AddToWorklist(N);
6132  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
6133  return SDValue(N, 0);
6134  }
6135  }
6136 
6137  if (LegalTypes) {
6138  // Attempt to propagate the AND back up to the leaves which, if they're
6139  // loads, can be combined to narrow loads and the AND node can be removed.
6140  // Perform after legalization so that extend nodes will already be
6141  // combined into the loads.
6142  if (BackwardsPropagateMask(N))
6143  return SDValue(N, 0);
6144  }
6145 
6146  if (SDValue Combined = visitANDLike(N0, N1, N))
6147  return Combined;
6148 
6149  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
6150  if (N0.getOpcode() == N1.getOpcode())
6151  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6152  return V;
6153 
6154  // Masking the negated extension of a boolean is just the zero-extended
6155  // boolean:
6156  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6157  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6158  //
6159  // Note: the SimplifyDemandedBits fold below can make an information-losing
6160  // transform, and then we have no way to find this better fold.
6161  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6162  if (isNullOrNullSplat(N0.getOperand(0))) {
6163  SDValue SubRHS = N0.getOperand(1);
6164  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6165  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6166  return SubRHS;
6167  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6168  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6169  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6170  }
6171  }
6172 
6173  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6174  // fold (and (sra)) -> (and (srl)) when possible.
6175  if (SimplifyDemandedBits(SDValue(N, 0)))
6176  return SDValue(N, 0);
6177 
6178  // fold (zext_inreg (extload x)) -> (zextload x)
6179  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6180  if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6181  (ISD::isEXTLoad(N0.getNode()) ||
6182  (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6183  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6184  EVT MemVT = LN0->getMemoryVT();
6185  // If we zero all the possible extended bits, then we can turn this into
6186  // a zextload if we are running before legalize or the operation is legal.
6187  unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6188  unsigned MemBitSize = MemVT.getScalarSizeInBits();
6189  APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6190  if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6191  ((!LegalOperations && LN0->isSimple()) ||
6192  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6193  SDValue ExtLoad =
6194  DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6195  LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6196  AddToWorklist(N);
6197  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6198  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6199  }
6200  }
6201 
6202  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6203  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6204  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6205  N0.getOperand(1), false))
6206  return BSwap;
6207  }
6208 
6209  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6210  return Shifts;
6211 
6212  if (TLI.hasBitTest(N0, N1))
6213  if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6214  return V;
6215 
6216  // Recognize the following pattern:
6217  //
6218  // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6219  //
6220  // where bitmask is a mask that clears the upper bits of AndVT. The
6221  // number of bits in bitmask must be a power of two.
6222  auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6223  if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6224  return false;
6225 
6226  auto *C = dyn_cast<ConstantSDNode>(RHS);
6227  if (!C)
6228  return false;
6229 
6230  if (!C->getAPIntValue().isMask(
6231  LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6232  return false;
6233 
6234  return true;
6235  };
6236 
6237  // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6238  if (IsAndZeroExtMask(N0, N1))
6239  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6240 
6241  if (hasOperation(ISD::USUBSAT, VT))
6242  if (SDValue V = foldAndToUsubsat(N, DAG))
6243  return V;
6244 
6245  return SDValue();
6246 }
6247 
6248 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6249 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6250  bool DemandHighBits) {
6251  if (!LegalOperations)
6252  return SDValue();
6253 
6254  EVT VT = N->getValueType(0);
6255  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6256  return SDValue();
6257  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6258  return SDValue();
6259 
6260  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6261  bool LookPassAnd0 = false;
6262  bool LookPassAnd1 = false;
6263  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6264  std::swap(N0, N1);
6265  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6266  std::swap(N0, N1);
6267  if (N0.getOpcode() == ISD::AND) {
6268  if (!N0.getNode()->hasOneUse())
6269  return SDValue();
6270  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6271  // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6272  // This is needed for X86.
6273  if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6274  N01C->getZExtValue() != 0xFFFF))
6275  return SDValue();
6276  N0 = N0.getOperand(0);
6277  LookPassAnd0 = true;
6278  }
6279 
6280  if (N1.getOpcode() == ISD::AND) {
6281  if (!N1.getNode()->hasOneUse())
6282  return SDValue();
6283  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6284  if (!N11C || N11C->getZExtValue() != 0xFF)
6285  return SDValue();
6286  N1 = N1.getOperand(0);
6287  LookPassAnd1 = true;
6288  }
6289 
6290  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6291  std::swap(N0, N1);
6292  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6293  return SDValue();
6294  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6295  return SDValue();
6296 
6297  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6298  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6299  if (!N01C || !N11C)
6300  return SDValue();
6301  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6302  return SDValue();
6303 
6304  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6305  SDValue N00 = N0->getOperand(0);
6306  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6307  if (!N00.getNode()->hasOneUse())
6308  return SDValue();
6309  ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6310  if (!N001C || N001C->getZExtValue() != 0xFF)
6311  return SDValue();
6312  N00 = N00.getOperand(0);
6313  LookPassAnd0 = true;
6314  }
6315 
6316  SDValue N10 = N1->getOperand(0);
6317  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6318  if (!N10.getNode()->hasOneUse())
6319  return SDValue();
6320  ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6321  // Also allow 0xFFFF since the bits will be shifted out. This is needed
6322  // for X86.
6323  if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6324  N101C->getZExtValue() != 0xFFFF))
6325  return SDValue();
6326  N10 = N10.getOperand(0);
6327  LookPassAnd1 = true;
6328  }
6329 
6330  if (N00 != N10)
6331  return SDValue();
6332 
6333  // Make sure everything beyond the low halfword gets set to zero since the SRL
6334  // 16 will clear the top bits.
6335  unsigned OpSizeInBits = VT.getSizeInBits();
6336  if (DemandHighBits && OpSizeInBits > 16) {
6337  // If the left-shift isn't masked out then the only way this is a bswap is
6338  // if all bits beyond the low 8 are 0. In that case the entire pattern
6339  // reduces to a left shift anyway: leave it for other parts of the combiner.
6340  if (!LookPassAnd0)
6341  return SDValue();
6342 
6343  // However, if the right shift isn't masked out then it might be because
6344  // it's not needed. See if we can spot that too.
6345  if (!LookPassAnd1 &&
6346  !DAG.MaskedValueIsZero(
6347  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6348  return SDValue();
6349  }
6350 
6351  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6352  if (OpSizeInBits > 16) {
6353  SDLoc DL(N);
6354  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6355  DAG.getConstant(OpSizeInBits - 16, DL,
6356  getShiftAmountTy(VT)));
6357  }
6358  return Res;
6359 }
6360 
6361 /// Return true if the specified node is an element that makes up a 32-bit
6362 /// packed halfword byteswap.
6363 /// ((x & 0x000000ff) << 8) |
6364 /// ((x & 0x0000ff00) >> 8) |
6365 /// ((x & 0x00ff0000) << 8) |
6366 /// ((x & 0xff000000) >> 8)
6368  if (!N.getNode()->hasOneUse())
6369  return false;
6370 
6371  unsigned Opc = N.getOpcode();
6372  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6373  return false;
6374 
6375  SDValue N0 = N.getOperand(0);
6376  unsigned Opc0 = N0.getOpcode();
6377  if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6378  return false;
6379 
6380  ConstantSDNode *N1C = nullptr;
6381  // SHL or SRL: look upstream for AND mask operand
6382  if (Opc == ISD::AND)
6383  N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6384  else if (Opc0 == ISD::AND)
6385  N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6386  if (!N1C)
6387  return false;
6388 
6389  unsigned MaskByteOffset;
6390  switch (N1C->getZExtValue()) {
6391  default:
6392  return false;
6393  case 0xFF: MaskByteOffset = 0; break;
6394  case 0xFF00: MaskByteOffset = 1; break;
6395  case 0xFFFF:
6396  // In case demanded bits didn't clear the bits that will be shifted out.
6397  // This is needed for X86.
6398  if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6399  MaskByteOffset = 1;
6400  break;
6401  }
6402  return false;
6403  case 0xFF0000: MaskByteOffset = 2; break;
6404  case 0xFF000000: MaskByteOffset = 3; break;
6405  }
6406 
6407  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6408  if (Opc == ISD::AND) {
6409  if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6410  // (x >> 8) & 0xff
6411  // (x >> 8) & 0xff0000
6412  if (Opc0 != ISD::SRL)
6413  return false;
6414  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6415  if (!C || C->getZExtValue() != 8)
6416  return false;
6417  } else {
6418  // (x << 8) & 0xff00
6419  // (x << 8) & 0xff000000
6420  if (Opc0 != ISD::SHL)
6421  return false;
6422  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6423  if (!C || C->getZExtValue() != 8)
6424  return false;
6425  }
6426  } else if (Opc == ISD::SHL) {
6427  // (x & 0xff) << 8
6428  // (x & 0xff0000) << 8
6429  if (MaskByteOffset != 0 && MaskByteOffset != 2)
6430  return false;
6431  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6432  if (!C || C->getZExtValue() != 8)
6433  return false;
6434  } else { // Opc == ISD::SRL
6435  // (x & 0xff00) >> 8
6436  // (x & 0xff000000) >> 8
6437  if (MaskByteOffset != 1 && MaskByteOffset != 3)
6438  return false;
6439  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6440  if (!C || C->getZExtValue() != 8)
6441  return false;
6442  }
6443 
6444  if (Parts[MaskByteOffset])
6445  return false;
6446 
6447  Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6448  return true;
6449 }
6450 
6451 // Match 2 elements of a packed halfword bswap.
6453  if (N.getOpcode() == ISD::OR)
6454  return isBSwapHWordElement(N.getOperand(0), Parts) &&
6455  isBSwapHWordElement(N.getOperand(1), Parts);
6456 
6457  if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6458  ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6459  if (!C || C->getAPIntValue() != 16)
6460  return false;
6461  Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6462  return true;
6463  }
6464 
6465  return false;
6466 }
6467 
6468 // Match this pattern:
6469 // (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6470 // And rewrite this to:
6471 // (rotr (bswap A), 16)
6473  SelectionDAG &DAG, SDNode *N, SDValue N0,
6474  SDValue N1, EVT VT, EVT ShiftAmountTy) {
6475  assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6476  "MatchBSwapHWordOrAndAnd: expecting i32");
6477  if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6478  return SDValue();
6479  if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6480  return SDValue();
6481  // TODO: this is too restrictive; lifting this restriction requires more tests
6482  if (!N0->hasOneUse() || !N1->hasOneUse())
6483  return SDValue();
6486  if (!Mask0 || !Mask1)
6487  return SDValue();
6488  if (Mask0->getAPIntValue() != 0xff00ff00 ||
6489  Mask1->getAPIntValue() != 0x00ff00ff)
6490  return SDValue();
6491  SDValue Shift0 = N0.getOperand(0);
6492  SDValue Shift1 = N1.getOperand(0);
6493  if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6494  return SDValue();
6495  ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6496  ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6497  if (!ShiftAmt0 || !ShiftAmt1)
6498  return SDValue();
6499  if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6500  return SDValue();
6501  if (Shift0.getOperand(0) != Shift1.getOperand(0))
6502  return SDValue();
6503 
6504  SDLoc DL(N);
6505  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6506  SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6507  return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6508 }
6509 
6510 /// Match a 32-bit packed halfword bswap. That is
6511 /// ((x & 0x000000ff) << 8) |
6512 /// ((x & 0x0000ff00) >> 8) |
6513 /// ((x & 0x00ff0000) << 8) |
6514 /// ((x & 0xff000000) >> 8)
6515 /// => (rotl (bswap x), 16)
6516 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6517  if (!LegalOperations)
6518  return SDValue();
6519 
6520  EVT VT = N->getValueType(0);
6521  if (VT != MVT::i32)
6522  return SDValue();
6523  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6524  return SDValue();
6525 
6526  if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6527  getShiftAmountTy(VT)))
6528  return BSwap;
6529 
6530  // Try again with commuted operands.
6531  if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6532  getShiftAmountTy(VT)))
6533  return BSwap;
6534 
6535 
6536  // Look for either
6537  // (or (bswaphpair), (bswaphpair))
6538  // (or (or (bswaphpair), (and)), (and))
6539  // (or (or (and), (bswaphpair)), (and))
6540  SDNode *Parts[4] = {};
6541 
6542  if (isBSwapHWordPair(N0, Parts)) {
6543  // (or (or (and), (and)), (or (and), (and)))
6544  if (!isBSwapHWordPair(N1, Parts))
6545  return SDValue();
6546  } else if (N0.getOpcode() == ISD::OR) {
6547  // (or (or (or (and), (and)), (and)), (and))
6548  if (!isBSwapHWordElement(N1, Parts))
6549  return SDValue();
6550  SDValue N00 = N0.getOperand(0);
6551  SDValue N01 = N0.getOperand(1);
6552  if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6553  !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6554  return SDValue();
6555  } else
6556  return SDValue();
6557 
6558  // Make sure the parts are all coming from the same node.
6559  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6560  return SDValue();
6561 
6562  SDLoc DL(N);
6563  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6564  SDValue(Parts[0], 0));
6565 
6566  // Result of the bswap should be rotated by 16. If it's not legal, then
6567  // do (x << 16) | (x >> 16).
6568  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6569  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6570  return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6571  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6572  return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6573  return DAG.getNode(ISD::OR, DL, VT,
6574  DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6575  DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6576 }
6577 
6578 /// This contains all DAGCombine rules which reduce two values combined by
6579 /// an Or operation to a single value \see visitANDLike().
6580 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6581  EVT VT = N1.getValueType();
6582  SDLoc DL(N);
6583 
6584  // fold (or x, undef) -> -1
6585  if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6586  return DAG.getAllOnesConstant(DL, VT);
6587 
6588  if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6589  return V;
6590 
6591  // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6592  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6593  // Don't increase # computations.
6594  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6595  // We can only do this xform if we know that bits from X that are set in C2
6596  // but not in C1 are already zero. Likewise for Y.
6597  if (const ConstantSDNode *N0O1C =
6599  if (const ConstantSDNode *N1O1C =
6601  // We can only do this xform if we know that bits from X that are set in
6602  // C2 but not in C1 are already zero. Likewise for Y.
6603  const APInt &LHSMask = N0O1C->getAPIntValue();
6604  const APInt &RHSMask = N1O1C->getAPIntValue();
6605 
6606  if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6607  DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6608  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6609  N0.getOperand(0), N1.getOperand(0));
6610  return DAG.getNode(ISD::AND, DL, VT, X,
6611  DAG.getConstant(LHSMask | RHSMask, DL, VT));
6612  }
6613  }
6614  }
6615  }
6616 
6617  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6618  if (N0.getOpcode() == ISD::AND &&
6619  N1.getOpcode() == ISD::AND &&
6620  N0.getOperand(0) == N1.getOperand(0) &&
6621  // Don't increase # computations.
6622  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6623  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6624  N0.getOperand(1), N1.getOperand(1));
6625  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6626  }
6627 
6628  return SDValue();
6629 }
6630 
6631 /// OR combines for which the commuted variant will be tried as well.
6633  SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6634  EVT VT = N0.getValueType();
6635  if (N0.getOpcode() == ISD::AND) {
6636  // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6637  if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6638  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6639 
6640  // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6641  if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6642  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6643  }
6644 
6645  return SDValue();
6646 }
6647 
6648 SDValue DAGCombiner::visitOR(SDNode *N) {
6649  SDValue N0 = N->getOperand(0);
6650  SDValue N1 = N->getOperand(1);
6651  EVT VT = N1.getValueType();
6652 
6653  // x | x --> x
6654  if (N0 == N1)
6655  return N0;
6656 
6657  // fold (or c1, c2) -> c1|c2
6658  if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6659  return C;
6660 
6661  // canonicalize constant to RHS
6664  return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6665 
6666  // fold vector ops
6667  if (VT.isVector()) {
6668  if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6669  return FoldedVOp;
6670 
6671  // fold (or x, 0) -> x, vector edition
6673  return N0;
6674 
6675  // fold (or x, -1) -> -1, vector edition
6677  // do not return N1, because undef node may exist in N1
6678  return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6679 
6680  // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6681  // Do this only if the resulting shuffle is legal.
6682  if (isa<ShuffleVectorSDNode>(N0) &&
6683  isa<ShuffleVectorSDNode>(N1) &&
6684  // Avoid folding a node with illegal type.
6685  TLI.isTypeLegal(VT)) {
6686  bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6687  bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6688  bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6689  bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6690  // Ensure both shuffles have a zero input.
6691  if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6692  assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6693  assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6694  const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6695  const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6696  bool CanFold = true;
6697  int NumElts = VT.getVectorNumElements();
6698  SmallVector<int, 4> Mask(NumElts);
6699 
6700  for (int i = 0; i != NumElts; ++i) {
6701  int M0 = SV0->getMaskElt(i);
6702  int M1 = SV1->getMaskElt(i);
6703 
6704  // Determine if either index is pointing to a zero vector.
6705  bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6706  bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6707 
6708  // If one element is zero and the otherside is undef, keep undef.
6709  // This also handles the case that both are undef.
6710  if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6711  Mask[i] = -1;
6712  continue;
6713  }
6714 
6715  // Make sure only one of the elements is zero.
6716  if (M0Zero == M1Zero) {
6717  CanFold = false;
6718  break;
6719  }
6720 
6721  assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6722 
6723  // We have a zero and non-zero element. If the non-zero came from
6724  // SV0 make the index a LHS index. If it came from SV1, make it
6725  // a RHS index. We need to mod by NumElts because we don't care
6726  // which operand it came from in the original shuffles.
6727  Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6728  }
6729 
6730  if (CanFold) {
6731  SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6732  SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6733 
6734  SDValue LegalShuffle =
6735  TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6736  Mask, DAG);
6737  if (LegalShuffle)
6738  return LegalShuffle;
6739  }
6740  }
6741  }
6742  }
6743 
6744  // fold (or x, 0) -> x
6745  if (isNullConstant(N1))
6746  return N0;
6747 
6748  // fold (or x, -1) -> -1
6749  if (isAllOnesConstant(N1))
6750  return N1;
6751 
6752  if (SDValue NewSel = foldBinOpIntoSelect(N))
6753  return NewSel;
6754 
6755  // fold (or x, c) -> c iff (x & ~c) == 0
6756  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6757  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6758  return N1;
6759 
6760  if (SDValue Combined = visitORLike(N0, N1, N))
6761  return Combined;
6762 
6763  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6764  return Combined;
6765 
6766  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6767  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6768  return BSwap;
6769  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6770  return BSwap;
6771 
6772  // reassociate or
6773  if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6774  return ROR;
6775 
6776  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6777  // iff (c1 & c2) != 0 or c1/c2 are undef.
6778  auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6779  return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6780  };
6781  if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6782  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6783  if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6784  {N1, N0.getOperand(1)})) {
6785  SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6786  AddToWorklist(IOR.getNode());
6787  return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6788  }
6789  }
6790 
6791  if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6792  return Combined;
6793  if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6794  return Combined;
6795 
6796  // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6797  if (N0.getOpcode() == N1.getOpcode())
6798  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6799  return V;
6800 
6801  // See if this is some rotate idiom.
6802  if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6803  return Rot;
6804 
6805  if (SDValue Load = MatchLoadCombine(N))
6806  return Load;
6807 
6808  // Simplify the operands using demanded-bits information.
6809  if (SimplifyDemandedBits(SDValue(N, 0)))
6810  return SDValue(N, 0);
6811 
6812  // If OR can be rewritten into ADD, try combines based on ADD.
6813  if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6814  DAG.haveNoCommonBitsSet(N0, N1))
6815  if (SDValue Combined = visitADDLike(N))
6816  return Combined;
6817 
6818  return SDValue();
6819 }
6820 
6822  if (Op.getOpcode() == ISD::AND &&
6823  DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6824  Mask = Op.getOperand(1);
6825  return Op.getOperand(0);
6826  }
6827  return Op;
6828 }
6829 
6830 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6831 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6832  SDValue &Mask) {
6833  Op = stripConstantMask(DAG, Op, Mask);
6834  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6835  Shift = Op;
6836  return true;
6837  }
6838  return false;
6839 }
6840 
6841 /// Helper function for visitOR to extract the needed side of a rotate idiom
6842 /// from a shl/srl/mul/udiv. This is meant to handle cases where
6843 /// InstCombine merged some outside op with one of the shifts from
6844 /// the rotate pattern.
6845 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6846 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6847 /// patterns:
6848 ///
6849 /// (or (add v v) (shrl v bitwidth-1)):
6850 /// expands (add v v) -> (shl v 1)
6851 ///
6852 /// (or (mul v c0) (shrl (mul v c1) c2)):
6853 /// expands (mul v c0) -> (shl (mul v c1) c3)
6854 ///
6855 /// (or (udiv v c0) (shl (udiv v c1) c2)):
6856 /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6857 ///
6858 /// (or (shl v c0) (shrl (shl v c1) c2)):
6859 /// expands (shl v c0) -> (shl (shl v c1) c3)
6860 ///
6861 /// (or (shrl v c0) (shl (shrl v c1) c2)):
6862 /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6863 ///
6864 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6866  SDValue ExtractFrom, SDValue &Mask,
6867  const SDLoc &DL) {
6868  assert(OppShift && ExtractFrom && "Empty SDValue");
6869  assert(
6870  (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6871  "Existing shift must be valid as a rotate half");
6872 
6873  ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6874 
6875  // Value and Type of the shift.
6876  SDValue OppShiftLHS = OppShift.getOperand(0);
6877  EVT ShiftedVT = OppShiftLHS.getValueType();
6878 
6879  // Amount of the existing shift.
6880  ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6881 
6882  // (add v v) -> (shl v 1)
6883  // TODO: Should this be a general DAG canonicalization?
6884  if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6885  ExtractFrom.getOpcode() == ISD::ADD &&
6886  ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6887  ExtractFrom.getOperand(0) == OppShiftLHS &&
6888  OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6889  return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6890  DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6891 
6892  // Preconditions:
6893  // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6894  //
6895  // Find opcode of the needed shift to be extracted from (op0 v c0).
6896  unsigned Opcode = ISD::DELETED_NODE;
6897  bool IsMulOrDiv = false;
6898  // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6899  // opcode or its arithmetic (mul or udiv) variant.
6900  auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6901  IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6902  if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6903  return false;
6904  Opcode = NeededShift;
6905  return true;
6906  };
6907  // op0 must be either the needed shift opcode or the mul/udiv equivalent
6908  // that the needed shift can be extracted from.
6909  if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6910  (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6911  return SDValue();
6912 
6913  // op0 must be the same opcode on both sides, have the same LHS argument,
6914  // and produce the same value type.
6915  if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6916  OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6917  ShiftedVT != ExtractFrom.getValueType())
6918  return SDValue();
6919 
6920  // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6921  ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6922  // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6923  ConstantSDNode *ExtractFromCst =
6924  isConstOrConstSplat(ExtractFrom.getOperand(1));
6925  // TODO: We should be able to handle non-uniform constant vectors for these values
6926  // Check that we have constant values.
6927  if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6928  !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6929  !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6930  return SDValue();
6931 
6932  // Compute the shift amount we need to extract to complete the rotate.
6933  const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6934  if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6935  return SDValue();
6936  APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6937  // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6938  APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6939  APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6940  zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6941 
6942  // Now try extract the needed shift from the ExtractFrom op and see if the
6943  // result matches up with the existing shift's LHS op.
6944  if (IsMulOrDiv) {
6945  // Op to extract from is a mul or udiv by a constant.
6946  // Check:
6947  // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6948  // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6949  const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6950  NeededShiftAmt.getZExtValue());
6951  APInt ResultAmt;
6952  APInt Rem;
6953  APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6954  if (Rem != 0 || ResultAmt != OppLHSAmt)
6955  return SDValue();
6956  } else {
6957  // Op to extract from is a shift by a constant.
6958  // Check:
6959  // c2 - (bitwidth(op0 v c0) - c1) == c0
6960  if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6961  ExtractFromAmt.getBitWidth()))
6962  return SDValue();
6963  }
6964 
6965  // Return the expanded shift op that should allow a rotate to be formed.
6966  EVT ShiftVT = OppShift.getOperand(1).getValueType();
6967  EVT ResVT = ExtractFrom.getValueType();
6968  SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6969  return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6970 }
6971 
6972 // Return true if we can prove that, whenever Neg and Pos are both in the
6973 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6974 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6975 //
6976 // (or (shift1 X, Neg), (shift2 X, Pos))
6977 //
6978 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6979 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
6980 // to consider shift amounts with defined behavior.
6981 //
6982 // The IsRotate flag should be set when the LHS of both shifts is the same.
6983 // Otherwise if matching a general funnel shift, it should be clear.
6984 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6985  SelectionDAG &DAG, bool IsRotate) {
6986  // If EltSize is a power of 2 then:
6987  //
6988  // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6989  // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6990  //
6991  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6992  // for the stronger condition:
6993  //
6994  // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6995  //
6996  // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6997  // we can just replace Neg with Neg' for the rest of the function.
6998  //
6999  // In other cases we check for the even stronger condition:
7000  //
7001  // Neg == EltSize - Pos [B]
7002  //
7003  // for all Neg and Pos. Note that the (or ...) then invokes undefined
7004  // behavior if Pos == 0 (and consequently Neg == EltSize).
7005  //
7006  // We could actually use [A] whenever EltSize is a power of 2, but the
7007  // only extra cases that it would match are those uninteresting ones
7008  // where Neg and Pos are never in range at the same time. E.g. for
7009  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
7010  // as well as (sub 32, Pos), but:
7011  //
7012  // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
7013  //
7014  // always invokes undefined behavior for 32-bit X.
7015  //
7016  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
7017  //
7018  // NOTE: We can only do this when matching an AND and not a general
7019  // funnel shift.
7020  unsigned MaskLoBits = 0;
7021  if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
7022  if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
7023  KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
7024  unsigned Bits = Log2_64(EltSize);
7025  if (NegC->getAPIntValue().getActiveBits() <= Bits &&
7026  ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
7027  Neg = Neg.getOperand(0);
7028  MaskLoBits = Bits;
7029  }
7030  }
7031  }
7032 
7033  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
7034  if (Neg.getOpcode() != ISD::SUB)
7035  return false;
7037  if (!NegC)
7038  return false;
7039  SDValue NegOp1 = Neg.getOperand(1);
7040 
7041  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
7042  // Pos'. The truncation is redundant for the purpose of the equality.
7043  if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
7044  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
7045  KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
7046  if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
7047  ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
7048  MaskLoBits))
7049  Pos = Pos.getOperand(0);
7050  }
7051  }
7052 
7053  // The condition we need is now:
7054  //
7055  // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
7056  //
7057  // If NegOp1 == Pos then we need:
7058  //
7059  // EltSize & Mask == NegC & Mask
7060  //
7061  // (because "x & Mask" is a truncation and distributes through subtraction).
7062  //
7063  // We also need to account for a potential truncation of NegOp1 if the amount
7064  // has already been legalized to a shift amount type.
7065  APInt Width;
7066  if ((Pos == NegOp1) ||
7067  (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
7068  Width = NegC->getAPIntValue();
7069 
7070  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
7071  // Then the condition we want to prove becomes:
7072  //
7073  // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
7074  //
7075  // which, again because "x & Mask" is a truncation, becomes:
7076  //
7077  // NegC & Mask == (EltSize - PosC) & Mask
7078  // EltSize & Mask == (NegC + PosC) & Mask
7079  else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
7080  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
7081  Width = PosC->getAPIntValue() + NegC->getAPIntValue();
7082  else
7083  return false;
7084  } else
7085  return false;
7086 
7087  // Now we just need to check that EltSize & Mask == Width & Mask.
7088  if (MaskLoBits)
7089  // EltSize & Mask is 0 since Mask is EltSize - 1.
7090  return Width.getLoBits(MaskLoBits) == 0;
7091  return Width == EltSize;
7092 }
7093 
7094 // A subroutine of MatchRotate used once we have found an OR of two opposite
7095 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
7096 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7097 // former being preferred if supported. InnerPos and InnerNeg are Pos and
7098 // Neg with outer conversions stripped away.
7099 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7100  SDValue Neg, SDValue InnerPos,
7101  SDValue InnerNeg, unsigned PosOpcode,
7102  unsigned NegOpcode, const SDLoc &DL) {
7103  // fold (or (shl x, (*ext y)),
7104  // (srl x, (*ext (sub 32, y)))) ->
7105  // (rotl x, y) or (rotr x, (sub 32, y))
7106  //
7107  // fold (or (shl x, (*ext (sub 32, y))),
7108  // (srl x, (*ext y))) ->
7109  // (rotr x, y) or (rotl x, (sub 32, y))
7110  EVT VT = Shifted.getValueType();
7111  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7112  /*IsRotate*/ true)) {
7113  bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
7114  return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7115  HasPos ? Pos : Neg);
7116  }
7117 
7118  return SDValue();
7119 }
7120 
7121 // A subroutine of MatchRotate used once we have found an OR of two opposite
7122 // shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
7123 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7124 // former being preferred if supported. InnerPos and InnerNeg are Pos and
7125 // Neg with outer conversions stripped away.
7126 // TODO: Merge with MatchRotatePosNeg.
7127 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7128  SDValue Neg, SDValue InnerPos,
7129  SDValue InnerNeg, unsigned PosOpcode,
7130  unsigned NegOpcode, const SDLoc &DL) {
7131  EVT VT = N0.getValueType();
7132  unsigned EltBits = VT.getScalarSizeInBits();
7133 
7134  // fold (or (shl x0, (*ext y)),
7135  // (srl x1, (*ext (sub 32, y)))) ->
7136  // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7137  //
7138  // fold (or (shl x0, (*ext (sub 32, y))),
7139  // (srl x1, (*ext y))) ->
7140  // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7141  if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7142  bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
7143  return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7144  HasPos ? Pos : Neg);
7145  }
7146 
7147  // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7148  // so for now just use the PosOpcode case if its legal.
7149  // TODO: When can we use the NegOpcode case?
7150  if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7151  auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7152  if (Op.getOpcode() != BinOpc)
7153  return false;
7154  ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7155  return Cst && (Cst->getAPIntValue() == Imm);
7156  };
7157 
7158  // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7159  // -> (fshl x0, x1, y)
7160  if (IsBinOpImm(N1, ISD::SRL, 1) &&
7161  IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7162  InnerPos == InnerNeg.getOperand(0) &&
7164  return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7165  }
7166 
7167  // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7168  // -> (fshr x0, x1, y)
7169  if (IsBinOpImm(N0, ISD::SHL, 1) &&
7170  IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7171  InnerNeg == InnerPos.getOperand(0) &&
7173  return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7174  }
7175 
7176  // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7177  // -> (fshr x0, x1, y)
7178  // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7179  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7180  IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7181  InnerNeg == InnerPos.getOperand(0) &&
7183  return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7184  }
7185  }
7186 
7187  return SDValue();
7188 }
7189 
7190 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
7191 // idioms for rotate, and if the target supports rotation instructions, generate
7192 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7193 // with different shifted sources.
7194 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7195  EVT VT = LHS.getValueType();
7196 
7197  // The target must have at least one rotate/funnel flavor.
7198  // We still try to match rotate by constant pre-legalization.
7199  // TODO: Support pre-legalization funnel-shift by constant.
7200  bool HasROTL = hasOperation(ISD::ROTL, VT);
7201  bool HasROTR = hasOperation(ISD::ROTR, VT);
7202  bool HasFSHL = hasOperation(ISD::FSHL, VT);
7203  bool HasFSHR = hasOperation(ISD::FSHR, VT);
7204  if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7205  return SDValue();
7206 
7207  // Check for truncated rotate.
7208  if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7209  LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7210  assert(LHS.getValueType() == RHS.getValueType());
7211  if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7212  return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7213  }
7214  }
7215 
7216  // Match "(X shl/srl V1) & V2" where V2 may not be present.
7217  SDValue LHSShift; // The shift.
7218  SDValue LHSMask; // AND value if any.
7219  matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7220 
7221  SDValue RHSShift; // The shift.
7222  SDValue RHSMask; // AND value if any.
7223  matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7224 
7225  // If neither side matched a rotate half, bail
7226  if (!LHSShift && !RHSShift)
7227  return SDValue();
7228 
7229  // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7230  // side of the rotate, so try to handle that here. In all cases we need to
7231  // pass the matched shift from the opposite side to compute the opcode and
7232  // needed shift amount to extract. We still want to do this if both sides
7233  // matched a rotate half because one half may be a potential overshift that
7234  // can be broken down (ie if InstCombine merged two shl or srl ops into a
7235  // single one).
7236 
7237  // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7238  if (LHSShift)
7239  if (SDValue NewRHSShift =
7240  extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7241  RHSShift = NewRHSShift;
7242  // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7243  if (RHSShift)
7244  if (SDValue NewLHSShift =
7245  extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7246  LHSShift = NewLHSShift;
7247 
7248  // If a side is still missing, nothing else we can do.
7249  if (!RHSShift || !LHSShift)
7250  return SDValue();
7251 
7252  // At this point we've matched or extracted a shift op on each side.
7253 
7254  if (LHSShift.getOpcode() == RHSShift.getOpcode())
7255  return SDValue(); // Shifts must disagree.
7256 
7257  // TODO: Support pre-legalization funnel-shift by constant.
7258  bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7259  if (!IsRotate && !(HasFSHL || HasFSHR))
7260  return SDValue(); // Requires funnel shift support.
7261 
7262  // Canonicalize shl to left side in a shl/srl pair.
7263  if (RHSShift.getOpcode() == ISD::SHL) {
7264  std::swap(LHS, RHS);
7265  std::swap(LHSShift, RHSShift);
7266  std::swap(LHSMask, RHSMask);
7267  }
7268 
7269  unsigned EltSizeInBits = VT.getScalarSizeInBits();
7270  SDValue LHSShiftArg = LHSShift.getOperand(0);
7271  SDValue LHSShiftAmt = LHSShift.getOperand(1);
7272  SDValue RHSShiftArg = RHSShift.getOperand(0);
7273  SDValue RHSShiftAmt = RHSShift.getOperand(1);
7274 
7275  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7276  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7277  // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7278  // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7279  // iff C1+C2 == EltSizeInBits
7280  auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7281  ConstantSDNode *RHS) {
7282  return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7283  };
7284  if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7285  SDValue Res;
7286  if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7287  bool UseROTL = !LegalOperations || HasROTL;
7288  Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7289  UseROTL ? LHSShiftAmt : RHSShiftAmt);
7290  } else {
7291  bool UseFSHL = !LegalOperations || HasFSHL;
7292  Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7293  RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7294  }
7295 
7296  // If there is an AND of either shifted operand, apply it to the result.
7297  if (LHSMask.getNode() || RHSMask.getNode()) {
7298  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7299  SDValue Mask = AllOnes;
7300 
7301  if (LHSMask.getNode()) {
7302  SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7303  Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7304  DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7305  }
7306  if (RHSMask.getNode()) {
7307  SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7308  Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7309  DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7310  }
7311 
7312  Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7313  }
7314 
7315  return Res;
7316  }
7317 
7318  // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7319  // shift.
7320  if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7321  return SDValue();
7322 
7323  // If there is a mask here, and we have a variable shift, we can't be sure
7324  // that we're masking out the right stuff.
7325  if (LHSMask.getNode() || RHSMask.getNode())
7326  return SDValue();
7327 
7328  // If the shift amount is sign/zext/any-extended just peel it off.
7329  SDValue LExtOp0 = LHSShiftAmt;
7330  SDValue RExtOp0 = RHSShiftAmt;
7331  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7332  LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7333  LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7334  LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7335  (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7336  RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7337  RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7338  RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7339  LExtOp0 = LHSShiftAmt.getOperand(0);
7340  RExtOp0 = RHSShiftAmt.getOperand(0);
7341  }
7342 
7343  if (IsRotate && (HasROTL || HasROTR)) {
7344  SDValue TryL =
7345  MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7346  RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7347  if (TryL)
7348  return TryL;
7349 
7350  SDValue TryR =
7351  MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7352  LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7353  if (TryR)
7354  return TryR;
7355  }
7356 
7357  SDValue TryL =
7358  MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7359  LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7360  if (TryL)
7361  return TryL;
7362 
7363  SDValue TryR =
7364  MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7365  RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7366  if (TryR)
7367  return TryR;
7368 
7369  return SDValue();
7370 }
7371 
7372 namespace {
7373 
7374 /// Represents known origin of an individual byte in load combine pattern. The
7375 /// value of the byte is either constant zero or comes from memory.
7376 struct ByteProvider {
7377  // For constant zero providers Load is set to nullptr. For memory providers
7378  // Load represents the node which loads the byte from memory.
7379  // ByteOffset is the offset of the byte in the value produced by the load.
7380  LoadSDNode *Load = nullptr;
7381  unsigned ByteOffset = 0;
7382 
7383  ByteProvider() = default;
7384 
7385  static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7386  return ByteProvider(Load, ByteOffset);
7387  }
7388 
7389  static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7390 
7391  bool isConstantZero() const { return !Load; }
7392  bool isMemory() const { return Load; }
7393 
7394  bool operator==(const ByteProvider &Other) const {
7395  return Other.Load == Load && Other.ByteOffset == ByteOffset;
7396  }
7397 
7398 private:
7399  ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7400  : Load(Load), ByteOffset(ByteOffset) {}
7401 };
7402 
7403 } // end anonymous namespace
7404 
7405 /// Recursively traverses the expression calculating the origin of the requested
7406 /// byte of the given value. Returns None if the provider can't be calculated.
7407 ///
7408 /// For all the values except the root of the expression verifies that the value
7409 /// has exactly one use and if it's not true return None. This way if the origin
7410 /// of the byte is returned it's guaranteed that the values which contribute to
7411 /// the byte are not used outside of this expression.
7412 ///
7413 /// Because the parts of the expression are not allowed to have more than one
7414 /// use this function iterates over trees, not DAGs. So it never visits the same
7415 /// node more than once.
7416 static const Optional<ByteProvider>
7418  bool Root = false) {
7419  // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7420  if (Depth == 10)
7421  return None;
7422 
7423  if (!Root && !Op.hasOneUse())
7424  return None;
7425 
7426  assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7427  unsigned BitWidth = Op.getValueSizeInBits();
7428  if (BitWidth % 8 != 0)
7429  return None;
7430  unsigned ByteWidth = BitWidth / 8;
7431  assert(Index < ByteWidth && "invalid index requested");
7432  (void) ByteWidth;
7433 
7434  switch (Op.getOpcode()) {
7435  case ISD::OR: {
7436  auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7437  if (!LHS)
7438  return None;
7439  auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7440  if (!RHS)
7441  return None;
7442 
7443  if (LHS->isConstantZero())
7444  return RHS;
7445  if (RHS->isConstantZero())
7446  return LHS;
7447  return None;
7448  }
7449  case ISD::SHL: {
7450  auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7451  if (!ShiftOp)
7452  return None;
7453 
7454  uint64_t BitShift = ShiftOp->getZExtValue();
7455  if (BitShift % 8 != 0)
7456  return None;
7457  uint64_t ByteShift = BitShift / 8;
7458 
7459  return Index < ByteShift
7460  ? ByteProvider::getConstantZero()
7461  : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7462  Depth + 1);
7463  }
7464  case ISD::ANY_EXTEND:
7465  case ISD::SIGN_EXTEND:
7466  case ISD::ZERO_EXTEND: {
7467  SDValue NarrowOp = Op->getOperand(0);
7468  unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7469  if (NarrowBitWidth % 8 != 0)
7470  return None;
7471  uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7472 
7473  if (Index >= NarrowByteWidth)
7474  return Op.getOpcode() == ISD::ZERO_EXTEND
7475  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7476  : None;
7477  return calculateByteProvider(NarrowOp, Index, Depth + 1);
7478  }
7479  case ISD::BSWAP:
7480  return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7481  Depth + 1);
7482  case ISD::LOAD: {
7483  auto L = cast<LoadSDNode>(Op.getNode());
7484  if (!L->isSimple() || L->isIndexed())
7485  return None;
7486 
7487  unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7488  if (NarrowBitWidth % 8 != 0)
7489  return None;
7490  uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7491 
7492  if (Index >= NarrowByteWidth)
7493  return L->getExtensionType() == ISD::ZEXTLOAD
7494  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7495  : None;
7496  return ByteProvider::getMemory(L, Index);
7497  }
7498  }
7499 
7500  return None;
7501 }
7502 
7503 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7504  return i;
7505 }
7506 
7507 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7508  return BW - i - 1;
7509 }
7510 
7511 // Check if the bytes offsets we are looking at match with either big or
7512 // little endian value loaded. Return true for big endian, false for little
7513 // endian, and None if match failed.
7515  int64_t FirstOffset) {
7516  // The endian can be decided only when it is 2 bytes at least.
7517  unsigned Width = ByteOffsets.size();
7518  if (Width < 2)
7519  return None;
7520 
7521  bool BigEndian = true, LittleEndian = true;
7522  for (unsigned i = 0; i < Width; i++) {
7523  int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7524  LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7525  BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7526  if (!BigEndian && !LittleEndian)
7527  return None;
7528  }
7529 
7530  assert((BigEndian != LittleEndian) && "It should be either big endian or"
7531  "little endian");
7532  return BigEndian;
7533 }
7534 
7536  switch (Value.getOpcode()) {
7537  case ISD::TRUNCATE:
7538  case ISD::ZERO_EXTEND:
7539  case ISD::SIGN_EXTEND:
7540  case ISD::ANY_EXTEND:
7541  return stripTruncAndExt(Value.getOperand(0));
7542  }
7543  return Value;
7544 }
7545 
7546 /// Match a pattern where a wide type scalar value is stored by several narrow
7547 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7548 /// supports it.
7549 ///
7550 /// Assuming little endian target:
7551 /// i8 *p = ...
7552 /// i32 val = ...
7553 /// p[0] = (val >> 0) & 0xFF;
7554 /// p[1] = (val >> 8) & 0xFF;
7555 /// p[2] = (val >> 16) & 0xFF;
7556 /// p[3] = (val >> 24) & 0xFF;
7557 /// =>
7558 /// *((i32)p) = val;
7559 ///
7560 /// i8 *p = ...
7561 /// i32 val = ...
7562 /// p[0] = (val >> 24) & 0xFF;
7563 /// p[1] = (val >> 16) & 0xFF;
7564 /// p[2] = (val >> 8) & 0xFF;
7565 /// p[3] = (val >> 0) & 0xFF;
7566 /// =>
7567 /// *((i32)p) = BSWAP(val);
7568 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7569  // The matching looks for "store (trunc x)" patterns that appear early but are
7570  // likely to be replaced by truncating store nodes during combining.
7571  // TODO: If there is evidence that running this later would help, this
7572  // limitation could be removed. Legality checks may need to be added
7573  // for the created store and optional bswap/rotate.
7574  if (LegalOperations || OptLevel == CodeGenOpt::None)
7575  return SDValue();
7576 
7577  // We only handle merging simple stores of 1-4 bytes.
7578  // TODO: Allow unordered atomics when wider type is legal (see D66309)
7579  EVT MemVT = N->getMemoryVT();
7580  if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7581  !N->isSimple() || N->isIndexed())
7582  return SDValue();
7583 
7584  // Collect all of the stores in the chain.
7585  SDValue Chain = N->getChain();
7586  SmallVector<StoreSDNode *, 8> Stores = {N};
7587  while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7588  // All stores must be the same size to ensure that we are writing all of the
7589  // bytes in the wide value.
7590  // TODO: We could allow multiple sizes by tracking each stored byte.
7591  if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7592  Store->isIndexed())
7593  return SDValue();
7594  Stores.push_back(Store);
7595  Chain = Store->getChain();
7596  }
7597  // There is no reason to continue if we do not have at least a pair of stores.
7598  if (Stores.size() < 2)
7599  return SDValue();
7600 
7601  // Handle simple types only.
7602  LLVMContext &Context = *DAG.getContext();
7603  unsigned NumStores = Stores.size();
7604  unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7605  unsigned WideNumBits = NumStores * NarrowNumBits;
7606  EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7607  if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7608  return SDValue();
7609 
7610  // Check if all bytes of the source value that we are looking at are stored
7611  // to the same base address. Collect offsets from Base address into OffsetMap.
7612  SDValue SourceValue;
7613  SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7614  int64_t FirstOffset = INT64_MAX;
7615  StoreSDNode *FirstStore = nullptr;
7617  for (auto Store : Stores) {
7618  // All the stores store different parts of the CombinedValue. A truncate is
7619  // required to get the partial value.
7620  SDValue Trunc = Store->getValue();
7621  if (Trunc.getOpcode() != ISD::TRUNCATE)
7622  return SDValue();
7623  // Other than the first/last part, a shift operation is required to get the
7624  // offset.
7625  int64_t Offset = 0;
7626  SDValue WideVal = Trunc.getOperand(0);
7627  if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7628  isa<ConstantSDNode>(WideVal.getOperand(1))) {
7629  // The shift amount must be a constant multiple of the narrow type.
7630  // It is translated to the offset address in the wide source value "y".
7631  //
7632  // x = srl y, ShiftAmtC
7633  // i8 z = trunc x
7634  // store z, ...
7635  uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7636  if (ShiftAmtC % NarrowNumBits != 0)
7637  return SDValue();
7638 
7639  Offset = ShiftAmtC / NarrowNumBits;
7640  WideVal = WideVal.getOperand(0);
7641  }
7642 
7643  // Stores must share the same source value with different offsets.
7644  // Truncate and extends should be stripped to get the single source value.
7645  if (!SourceValue)
7646  SourceValue = WideVal;
7647  else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7648  return SDValue();
7649  else if (SourceValue.getValueType() != WideVT) {
7650  if (WideVal.getValueType() == WideVT ||
7651  WideVal.getScalarValueSizeInBits() >
7652  SourceValue.getScalarValueSizeInBits())
7653  SourceValue = WideVal;
7654  // Give up if the source value type is smaller than the store size.
7655  if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7656  return SDValue();
7657  }
7658 
7659  // Stores must share the same base address.
7661  int64_t ByteOffsetFromBase = 0;
7662  if (!Base)
7663  Base = Ptr;
7664  else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7665  return SDValue();
7666 
7667  // Remember the first store.
7668  if (ByteOffsetFromBase < FirstOffset) {
7669  FirstStore = Store;
7670  FirstOffset = ByteOffsetFromBase;
7671  }
7672  // Map the offset in the store and the offset in the combined value, and
7673  // early return if it has been set before.
7674  if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7675  return SDValue();
7676  OffsetMap[Offset] = ByteOffsetFromBase;
7677  }
7678 
7679  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7680  assert(FirstStore && "First store must be set");
7681 
7682  // Check that a store of the wide type is both allowed and fast on the target
7683  const DataLayout &Layout = DAG.getDataLayout();
7684  bool Fast = false;
7685  bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7686  *FirstStore->getMemOperand(), &Fast);
7687  if (!Allowed || !Fast)
7688  return SDValue();
7689 
7690  // Check if the pieces of the value are going to the expected places in memory
7691  // to merge the stores.
7692  auto checkOffsets = [&](bool MatchLittleEndian) {
7693  if (MatchLittleEndian) {
7694  for (unsigned i = 0; i != NumStores; ++i)
7695  if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7696  return false;
7697  } else { // MatchBigEndian by reversing loop counter.
7698  for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7699  if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7700  return false;
7701  }
7702  return true;
7703  };
7704 
7705  // Check if the offsets line up for the native data layout of this target.
7706  bool NeedBswap = false;
7707  bool NeedRotate = false;
7708  if (!checkOffsets(Layout.isLittleEndian())) {
7709  // Special-case: check if byte offsets line up for the opposite endian.
7710  if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7711  NeedBswap = true;
7712  else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7713  NeedRotate = true;
7714  else
7715  return SDValue();
7716  }
7717 
7718  SDLoc DL(N);
7719  if (WideVT != SourceValue.getValueType()) {
7720  assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7721  "Unexpected store value to merge");
7722  SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7723  }
7724 
7725  // Before legalize we can introduce illegal bswaps/rotates which will be later
7726  // converted to an explicit bswap sequence. This way we end up with a single
7727  // store and byte shuffling instead of several stores and byte shuffling.
7728  if (NeedBswap) {
7729  SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7730  } else if (NeedRotate) {
7731  assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7732  SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7733  SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7734  }
7735 
7736  SDValue NewStore =
7737  DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7738  FirstStore->getPointerInfo(), FirstStore->getAlign());
7739 
7740  // Rely on other DAG combine rules to remove the other individual stores.
7741  DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7742  return NewStore;
7743 }
7744 
7745 /// Match a pattern where a wide type scalar value is loaded by several narrow
7746 /// loads and combined by shifts and ors. Fold it into a single load or a load
7747 /// and a BSWAP if the targets supports it.
7748 ///
7749 /// Assuming little endian target:
7750 /// i8 *a = ...
7751 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7752 /// =>
7753 /// i32 val = *((i32)a)
7754 ///
7755 /// i8 *a = ...
7756 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7757 /// =>
7758 /// i32 val = BSWAP(*((i32)a))
7759 ///
7760 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7761 /// interact well with the worklist mechanism. When a part of the pattern is
7762 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7763 /// but the root node of the pattern which triggers the load combine is not
7764 /// necessarily a direct user of the changed node. For example, once the address
7765 /// of t28 load is reassociated load combine won't be triggered:
7766 /// t25: i32 = add t4, Constant:i32<2>
7767 /// t26: i64 = sign_extend t25
7768 /// t27: i64 = add t2, t26
7769 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7770 /// t29: i32 = zero_extend t28
7771 /// t32: i32 = shl t29, Constant:i8<8>
7772 /// t33: i32 = or t23, t32
7773 /// As a possible fix visitLoad can check if the load can be a part of a load
7774 /// combine pattern and add corresponding OR roots to the worklist.
7775 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7776  assert(N->getOpcode() == ISD::OR &&
7777  "Can only match load combining against OR nodes");
7778 
7779  // Handles simple types only
7780  EVT VT = N->getValueType(0);
7781  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7782  return SDValue();
7783  unsigned ByteWidth = VT.getSizeInBits() / 8;
7784 
7785  bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7786  auto MemoryByteOffset = [&] (ByteProvider P) {
7787  assert(P.isMemory() && "Must be a memory byte provider");
7788  unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7789  assert(LoadBitWidth % 8 == 0 &&
7790  "can only analyze providers for individual bytes not bit");
7791  unsigned LoadByteWidth = LoadBitWidth / 8;
7792  return IsBigEndianTarget
7793  ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7794  : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7795  };
7796 
7798  SDValue Chain;
7799 
7801  Optional<ByteProvider> FirstByteProvider;
7802  int64_t FirstOffset = INT64_MAX;
7803 
7804  // Check if all the bytes of the OR we are looking at are loaded from the same
7805  // base address. Collect bytes offsets from Base address in ByteOffsets.
7806  SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7807  unsigned ZeroExtendedBytes = 0;
7808  for (int i = ByteWidth - 1; i >= 0; --i) {
7809  auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7810  if (!P)
7811  return SDValue();
7812 
7813  if (P->isConstantZero()) {
7814  // It's OK for the N most significant bytes to be 0, we can just
7815  // zero-extend the load.
7816  if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7817  return SDValue();
7818  continue;
7819  }
7820  assert(P->isMemory() && "provenance should either be memory or zero");
7821 
7822  LoadSDNode *L = P->Load;
7823  assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7824  !L->isIndexed() &&
7825  "Must be enforced by calculateByteProvider");
7826  assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7827 
7828  // All loads must share the same chain
7829  SDValue LChain = L->getChain();
7830  if (!Chain)
7831  Chain = LChain;
7832  else if (Chain != LChain)
7833  return SDValue();
7834 
7835  // Loads must share the same base address
7837  int64_t ByteOffsetFromBase = 0;
7838  if (!Base)
7839  Base = Ptr;
7840  else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7841  return SDValue();
7842 
7843  // Calculate the offset of the current byte from the base address
7844  ByteOffsetFromBase += MemoryByteOffset(*P);
7845  ByteOffsets[i] = ByteOffsetFromBase;
7846 
7847  // Remember the first byte load
7848  if (ByteOffsetFromBase < FirstOffset) {
7849  FirstByteProvider = P;
7850  FirstOffset = ByteOffsetFromBase;
7851  }
7852 
7853  Loads.insert(L);
7854  }
7855  assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7856  "memory, so there must be at least one load which produces the value");
7857  assert(Base && "Base address of the accessed memory location must be set");
7858  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7859 
7860  bool NeedsZext = ZeroExtendedBytes > 0;
7861 
7862  EVT MemVT =
7863  EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7864 
7865  if (!MemVT.isSimple())
7866  return SDValue();
7867 
7868  // Before legalize we can introduce too wide illegal loads which will be later
7869  // split into legal sized loads. This enables us to combine i64 load by i8
7870  // patterns to a couple of i32 loads on 32 bit targets.
7871  if (LegalOperations &&
7872  !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7873  MemVT))
7874  return SDValue();
7875 
7876  // Check if the bytes of the OR we are looking at match with either big or
7877  // little endian value load
7878  Optional<bool> IsBigEndian = isBigEndian(
7879  makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7880  if (!IsBigEndian.hasValue())
7881  return SDValue();
7882 
7883  assert(FirstByteProvider && "must be set");
7884 
7885  // Ensure that the first byte is loaded from zero offset of the first load.
7886  // So the combined value can be loaded from the first load address.
7887  if (MemoryByteOffset(*FirstByteProvider) != 0)
7888  return SDValue();
7889  LoadSDNode *FirstLoad = FirstByteProvider->Load;
7890 
7891  // The node we are looking at matches with the pattern, check if we can
7892  // replace it with a single (possibly zero-extended) load and bswap + shift if
7893  // needed.
7894 
7895  // If the load needs byte swap check if the target supports it
7896  bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7897 
7898  // Before legalize we can introduce illegal bswaps which will be later
7899  // converted to an explicit bswap sequence. This way we end up with a single
7900  // load and byte shuffling instead of several loads and byte shuffling.
7901  // We do not introduce illegal bswaps when zero-extending as this tends to
7902  // introduce too many arithmetic instructions.
7903  if (NeedsBswap && (LegalOperations || NeedsZext) &&
7904  !TLI.isOperationLegal(ISD::BSWAP, VT))
7905  return SDValue();
7906 
7907  // If we need to bswap and zero extend, we have to insert a shift. Check that
7908  // it is legal.
7909  if (NeedsBswap && NeedsZext && LegalOperations &&
7910  !TLI.isOperationLegal(ISD::SHL, VT))
7911  return SDValue();
7912 
7913  // Check that a load of the wide type is both allowed and fast on the target
7914  bool Fast = false;
7915  bool Allowed =
7916  TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7917  *FirstLoad->getMemOperand(), &Fast);
7918  if (!Allowed || !Fast)
7919  return SDValue();
7920 
7921  SDValue NewLoad =
7922  DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7923  Chain, FirstLoad->getBasePtr(),
7924  FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7925 
7926  // Transfer chain users from old loads to the new load.
7927  for (LoadSDNode *L : Loads)
7928  DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7929 
7930  if (!NeedsBswap)
7931  return NewLoad;
7932 
7933  SDValue ShiftedLoad =
7934  NeedsZext
7935  ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7936  DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7937  SDLoc(N), LegalOperations))
7938  : NewLoad;
7939  return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7940 }
7941 
7942 // If the target has andn, bsl, or a similar bit-select instruction,
7943 // we want to unfold masked merge, with canonical pattern of:
7944 // | A | |B|
7945 // ((x ^ y) & m) ^ y
7946 // | D |
7947 // Into:
7948 // (x & m) | (y & ~m)
7949 // If y is a constant, m is not a 'not', and the 'andn' does not work with
7950 // immediates, we unfold into a different pattern:
7951 // ~(~x & m) & (m | y)
7952 // If x is a constant, m is a 'not', and the 'andn' does not work with
7953 // immediates, we unfold into a different pattern:
7954 // (x | ~m) & ~(~m & ~y)
7955 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7956 // the very least that breaks andnpd / andnps patterns, and because those
7957 // patterns are simplified in IR and shouldn't be created in the DAG
7958 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7959  assert(N->getOpcode() == ISD::XOR);
7960 
7961  // Don't touch 'not' (i.e. where y = -1).
7962  if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7963  return SDValue();
7964 
7965  EVT VT = N->getValueType(0);
7966 
7967  // There are 3 commutable operators in the pattern,
7968  // so we have to deal with 8 possible variants of the basic pattern.
7969  SDValue X, Y, M;
7970  auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7971  if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7972  return false;
7973  SDValue Xor = And.getOperand(XorIdx);
7974  if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7975  return false;
7976  SDValue Xor0 = Xor.getOperand(0);
7977  SDValue Xor1 = Xor.getOperand(1);
7978  // Don't touch 'not' (i.e. where y = -1).
7979  if (isAllOnesOrAllOnesSplat(Xor1))
7980  return false;
7981  if (Other == Xor0)
7982  std::swap(Xor0, Xor1);
7983  if (Other != Xor1)
7984  return false;
7985  X = Xor0;
7986  Y = Xor1;
7987  M = And.getOperand(XorIdx ? 0 : 1);
7988  return true;
7989  };
7990 
7991  SDValue N0 = N->getOperand(0);
7992  SDValue N1 = N->getOperand(1);
7993  if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7994  !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7995  return SDValue();
7996 
7997  // Don't do anything if the mask is constant. This should not be reachable.
7998  // InstCombine should have already unfolded this pattern, and DAGCombiner
7999  // probably shouldn't produce it, too.
8000  if (isa<ConstantSDNode>(M.getNode()))
8001  return SDValue();
8002 
8003  // We can transform if the target has AndNot
8004  if (!TLI.hasAndNot(M))
8005  return SDValue();
8006 
8007  SDLoc DL(N);
8008 
8009  // If Y is a constant, check that 'andn' works with immediates. Unless M is
8010  // a bitwise not that would already allow ANDN to be used.
8011  if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
8012  assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
8013  // If not, we need to do a bit more work to make sure andn is still used.
8014  SDValue NotX = DAG.getNOT(DL, X, VT);
8015  SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
8016  SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
8017  SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
8018  return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
8019  }
8020 
8021  // If X is a constant and M is a bitwise not, check that 'andn' works with
8022  // immediates.
8023  if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
8024  assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
8025  // If not, we need to do a bit more work to make sure andn is still used.
8026  SDValue NotM = M.getOperand(0);
8027  SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
8028  SDValue NotY = DAG.getNOT(DL, Y, VT);
8029  SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
8030  SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
8031  return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
8032  }
8033 
8034  SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
8035  SDValue NotM = DAG.getNOT(DL, M, VT);
8036  SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
8037 
8038  return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
8039 }
8040 
8041 SDValue DAGCombiner::visitXOR(SDNode *N) {
8042  SDValue N0 = N->getOperand(0);
8043  SDValue N1 = N->getOperand(1);
8044  EVT VT = N0.getValueType();
8045  SDLoc DL(N);
8046 
8047  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
8048  if (N0.isUndef() && N1.isUndef())
8049  return DAG.getConstant(0, DL, VT);
8050 
8051  // fold (xor x, undef) -> undef
8052  if (N0.isUndef())
8053  return N0;
8054  if (N1.isUndef())
8055  return N1;
8056 
8057  // fold (xor c1, c2) -> c1^c2
8058  if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
8059  return C;
8060 
8061  // canonicalize constant to RHS
8064  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
8065 
8066  // fold vector ops
8067  if (VT.isVector()) {
8068  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8069  return FoldedVOp;
8070 
8071  // fold (xor x, 0) -> x, vector edition
8073  return N0;
8074  }
8075 
8076  // fold (xor x, 0) -> x
8077  if (isNullConstant(N1))
8078  return N0;
8079 
8080  if (SDValue NewSel = foldBinOpIntoSelect(N))
8081  return NewSel;
8082 
8083  // reassociate xor
8084  if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8085  return RXOR;
8086 
8087  // fold !(x cc y) -> (x !cc y)
8088  unsigned N0Opcode = N0.getOpcode();
8089  SDValue LHS, RHS, CC;
8090  if (TLI.isConstTrueVal(N1) &&
8091  isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
8092  ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8093  LHS.getValueType());
8094  if (!LegalOperations ||
8095  TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8096  switch (N0Opcode) {
8097  default:
8098  llvm_unreachable("Unhandled SetCC Equivalent!");
8099  case ISD::SETCC:
8100  return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8101  case ISD::SELECT_CC:
8102  return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8103  N0.getOperand(3), NotCC);
8104  case ISD::STRICT_FSETCC:
8105  case ISD::STRICT_FSETCCS: {
8106  if (N0.hasOneUse()) {
8107  // FIXME Can we handle multiple uses? Could we token factor the chain
8108  // results from the new/old setcc?
8109  SDValue SetCC =
8110  DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8111  N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8112  CombineTo(N, SetCC);
8113  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8114  recursivelyDeleteUnusedNodes(N0.getNode());
8115  return SDValue(N, 0); // Return N so it doesn't get rechecked!
8116  }
8117  break;
8118  }
8119  }
8120  }
8121  }
8122 
8123  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8124  if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8125  isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8126  SDValue V = N0.getOperand(0);
8127  SDLoc DL0(N0);
8128  V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8129  DAG.getConstant(1, DL0, V.getValueType()));
8130  AddToWorklist(V.getNode());
8131  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8132  }
8133 
8134  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8135  if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8136  (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8137  SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8138  if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8139  unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8140  N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8141  N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8142  AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8143  return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8144  }
8145  }
8146  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8147  if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8148  (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8149  SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8150  if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8151  unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8152  N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8153  N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8154  AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8155  return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8156  }
8157  }
8158 
8159  // fold (not (neg x)) -> (add X, -1)
8160  // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8161  // Y is a constant or the subtract has a single use.
8162  if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8163  isNullConstant(N0.getOperand(0))) {
8164  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8165  DAG.getAllOnesConstant(DL, VT));
8166  }
8167 
8168  // fold (not (add X, -1)) -> (neg X)
8169  if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8171  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8172  N0.getOperand(0));
8173  }
8174 
8175  // fold (xor (and x, y), y) -> (and (not x), y)
8176  if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8177  SDValue X = N0.getOperand(0);
8178  SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8179  AddToWorklist(NotX.getNode());
8180  return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8181  }
8182 
8183  if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
8184  ConstantSDNode *XorC = isConstOrConstSplat(N1);
8186  unsigned BitWidth = VT.getScalarSizeInBits();
8187  if (XorC && ShiftC) {
8188  // Don't crash on an oversized shift. We can not guarantee that a bogus
8189  // shift has been simplified to undef.
8190  uint64_t ShiftAmt = ShiftC->getLimitedValue();
8191  if (ShiftAmt < BitWidth) {
8193  Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
8194  if (XorC->getAPIntValue() == Ones) {
8195  // If the xor constant is a shifted -1, do a 'not' before the shift:
8196  // xor (X << ShiftC), XorC --> (not X) << ShiftC
8197  // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
8198  SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
8199  return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
8200  }
8201  }
8202  }
8203  }
8204 
8205  // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8206  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8207  SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8208  SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8209  if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8210  SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8211  SDValue S0 = S.getOperand(0);
8212  if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8214  if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8215  return DAG.getNode(ISD::ABS, DL, VT, S0);
8216  }
8217  }
8218 
8219  // fold (xor x, x) -> 0
8220  if (N0 == N1)
8221  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8222 
8223  // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8224  // Here is a concrete example of this equivalence:
8225  // i16 x == 14
8226  // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
8227  // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8228  //
8229  // =>
8230  //
8231  // i16 ~1 == 0b1111111111111110
8232  // i16 rol(~1, 14) == 0b1011111111111111
8233  //
8234  // Some additional tips to help conceptualize this transform:
8235  // - Try to see the operation as placing a single zero in a value of all ones.
8236  // - There exists no value for x which would allow the result to contain zero.
8237  // - Values of x larger than the bitwidth are undefined and do not require a
8238  // consistent result.
8239  // - Pushing the zero left requires shifting one bits in from the right.
8240  // A rotate left of ~1 is a nice way of achieving the desired result.
8241  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8242  isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8243  return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8244  N0.getOperand(1));
8245  }
8246 
8247  // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
8248  if (N0Opcode == N1.getOpcode())
8249  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8250  return V;
8251 
8252  // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
8253  if (SDValue MM = unfoldMaskedMerge(N))
8254  return MM;
8255 
8256  // Simplify the expression using non-local knowledge.
8257  if (SimplifyDemandedBits(SDValue(N, 0)))
8258  return SDValue(N, 0);
8259 
8260  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8261  return Combined;
8262 
8263  return SDValue();
8264 }
8265 
8266 /// If we have a shift-by-constant of a bitwise logic op that itself has a
8267 /// shift-by-constant operand with identical opcode, we may be able to convert
8268 /// that into 2 independent shifts followed by the logic op. This is a
8269 /// throughput improvement.
8271  // Match a one-use bitwise logic op.
8272  SDValue LogicOp = Shift->getOperand(0);
8273  if (!LogicOp.hasOneUse())
8274  return SDValue();
8275 
8276  unsigned LogicOpcode = LogicOp.getOpcode();
8277  if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8278  LogicOpcode != ISD::XOR)
8279  return SDValue();
8280 
8281  // Find a matching one-use shift by constant.
8282  unsigned ShiftOpcode = Shift->getOpcode();
8283  SDValue C1 = Shift->getOperand(1);
8284  ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8285  assert(C1Node && "Expected a shift with constant operand");
8286  const APInt &C1Val = C1Node->getAPIntValue();
8287  auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8288  const APInt *&ShiftAmtVal) {
8289  if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8290  return false;
8291 
8292  ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8293  if (!ShiftCNode)
8294  return false;
8295 
8296  // Capture the shifted operand and shift amount value.
8297  ShiftOp = V.getOperand(0);
8298  ShiftAmtVal = &ShiftCNode->getAPIntValue();
8299 
8300  // Shift amount types do not have to match their operand type, so check that
8301  // the constants are the same width.
8302  if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8303  return false;
8304 
8305  // The fold is not valid if the sum of the shift values exceeds bitwidth.
8306  if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8307  return false;
8308 
8309  return true;
8310  };
8311 
8312  // Logic ops are commutative, so check each operand for a match.
8313  SDValue X, Y;
8314  const APInt *C0Val;
8315  if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8316  Y = LogicOp.getOperand(1);
8317  else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8318  Y = LogicOp.getOperand(0);
8319  else
8320  return SDValue();
8321 
8322  // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8323  SDLoc DL(Shift);
8324  EVT VT = Shift->getValueType(0);
8325  EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8326  SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8327  SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8328  SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8329  return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8330 }
8331 
8332 /// Handle transforms common to the three shifts, when the shift amount is a
8333 /// constant.
8334 /// We are looking for: (shift being one of shl/sra/srl)
8335 /// shift (binop X, C0), C1
8336 /// And want to transform into:
8337 /// binop (shift X, C1), (shift C0, C1)
8338 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8339  assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8340 
8341  // Do not turn a 'not' into a regular xor.
8342  if (isBitwiseNot(N->getOperand(0)))
8343  return SDValue();
8344 
8345  // The inner binop must be one-use, since we want to replace it.
8346  SDValue LHS = N->getOperand(0);
8348  return SDValue();
8349 
8350  // TODO: This is limited to early combining because it may reveal regressions
8351  // otherwise. But since we just checked a target hook to see if this is
8352  // desirable, that should have filtered out cases where this interferes
8353  // with some other pattern matching.
8354  if (!LegalTypes)
8355  if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8356  return R;
8357 
8358  // We want to pull some binops through shifts, so that we have (and (shift))
8359  // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
8360  // thing happens with address calculations, so it's important to canonicalize
8361  // it.
8362  switch (LHS.getOpcode()) {
8363  default:
8364  return SDValue();
8365  case ISD::OR:
8366  case ISD::XOR:
8367  case ISD::AND:
8368  break;
8369  case ISD::ADD:
8370  if (N->getOpcode() != ISD::SHL)
8371  return SDValue(); // only shl(add) not sr[al](add).
8372  break;
8373  }
8374 
8375  // We require the RHS of the binop to be a constant and not opaque as well.
8376  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8377  if (!BinOpCst)
8378  return SDValue();
8379 
8380  // FIXME: disable this unless the input to the binop is a shift by a constant
8381  // or is copy/select. Enable this in other cases when figure out it's exactly
8382  // profitable.
8383  SDValue BinOpLHSVal = LHS.getOperand(0);
8384  bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8385  BinOpLHSVal.getOpcode() == ISD::SRA ||
8386  BinOpLHSVal.getOpcode() == ISD::SRL) &&
8387  isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8388  bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8389  BinOpLHSVal.getOpcode() == ISD::SELECT;
8390 
8391  if (!IsShiftByConstant && !IsCopyOrSelect)
8392  return SDValue();
8393 
8394  if (IsCopyOrSelect && N->hasOneUse())
8395  return SDValue();
8396 
8397  // Fold the constants, shifting the binop RHS by the shift amount.
8398  SDLoc DL(N);
8399  EVT VT = N->getValueType(0);
8400  SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8401  N->getOperand(1));
8402  assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8403 
8404  SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8405  N->getOperand(1));
8406  return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8407 }
8408 
8409 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8410  assert(N->getOpcode() == ISD::TRUNCATE);
8411  assert(N->getOperand(0).getOpcode() == ISD::AND);
8412 
8413  // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8414  EVT TruncVT = N->getValueType(0);
8415  if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8416  TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8417  SDValue N01 = N->getOperand(0).getOperand(1);
8418  if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8419  SDLoc DL(N);
8420  SDValue N00 = N->getOperand(0).getOperand(0);
8421  SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8422  SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8423  AddToWorklist(Trunc00.getNode());
8424  AddToWorklist(Trunc01.getNode());
8425  return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8426  }
8427  }
8428 
8429  return SDValue();
8430 }
8431 
8432 SDValue DAGCombiner::visitRotate(SDNode *N) {
8433  SDLoc dl(N);
8434  SDValue N0 = N->getOperand(0);
8435  SDValue N1 = N->getOperand(1);
8436  EVT VT = N->getValueType(0);
8437  unsigned Bitsize = VT.getScalarSizeInBits();
8438 
8439  // fold (rot x, 0) -> x
8440  if (isNullOrNullSplat(N1))
8441  return N0;
8442 
8443  // fold (rot x, c) -> x iff (c % BitSize) == 0
8444  if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8445  APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8446  if (DAG.MaskedValueIsZero(N1, ModuloMask))
8447  return N0;
8448  }
8449 
8450  // fold (rot x, c) -> (rot x, c % BitSize)
8451  bool OutOfRange = false;
8452  auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8453  OutOfRange |= C->getAPIntValue().uge(Bitsize);
8454  return true;
8455  };
8456  if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8457  EVT AmtVT = N1.getValueType();
8458  SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8459  if (SDValue Amt =
8460  DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8461  return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8462  }
8463 
8464  // rot i16 X, 8 --> bswap X
8465  auto *RotAmtC = isConstOrConstSplat(N1);
8466  if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8467  VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8468  return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8469 
8470  // Simplify the operands using demanded-bits information.
8471  if (SimplifyDemandedBits(SDValue(N, 0)))
8472  return SDValue(N, 0);
8473 
8474  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8475  if (N1.getOpcode() == ISD::TRUNCATE &&
8476  N1.getOperand(0).getOpcode() == ISD::AND) {
8477  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8478  return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8479  }
8480 
8481  unsigned NextOp = N0.getOpcode();
8482  // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8483  if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8486  if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8487  EVT ShiftVT = C1->getValueType(0);
8488  bool SameSide = (N->getOpcode() == NextOp);
8489  unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8490  if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8491  CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8492  SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8493  SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8494  ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8495  return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8496  CombinedShiftNorm);
8497  }
8498  }
8499  }
8500  return SDValue();
8501 }
8502 
8503 SDValue DAGCombiner::visitSHL(SDNode *N) {
8504  SDValue N0 = N->getOperand(0);
8505  SDValue N1 = N->getOperand(1);
8506  if (SDValue V = DAG.simplifyShift(N0, N1))
8507  return V;
8508 
8509  EVT VT = N0.getValueType();
8510  EVT ShiftVT = N1.getValueType();
8511  unsigned OpSizeInBits = VT.getScalarSizeInBits();
8512 
8513  // fold (shl c1, c2) -> c1<<c2
8514  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8515  return C;
8516 
8517  // fold vector ops
8518  if (VT.isVector()) {
8519  if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8520  return FoldedVOp;
8521 
8522  BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8523  // If setcc produces all-one true value then:
8524  // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8525  if (N1CV && N1CV->isConstant()) {
8526  if (N0.getOpcode() == ISD::AND) {
8527  SDValue N00 = N0->getOperand(0);
8528  SDValue N01 = N0->getOperand(1);
8529  BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8530 
8531  if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8532  TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8534  if (SDValue C =
8535  DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8536  return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8537  }
8538  }
8539  }
8540  }
8541 
8542  if (SDValue NewSel = foldBinOpIntoSelect(N))
8543  return NewSel;
8544 
8545  // if (shl x, c) is known to be zero, return 0
8546  if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8547  return DAG.getConstant(0, SDLoc(N), VT);
8548 
8549  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8550  if (N1.getOpcode() == ISD::TRUNCATE &&
8551  N1.getOperand(0).getOpcode() == ISD::AND) {
8552  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8553  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8554  }
8555 
8556  if (SimplifyDemandedBits(SDValue(N, 0)))
8557  return SDValue(N, 0);
8558 
8559  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8560  if (N0.getOpcode() == ISD::SHL) {
8561  auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8562  ConstantSDNode *RHS) {
8563  APInt c1 = LHS->getAPIntValue();
8564  APInt c2 = RHS->getAPIntValue();
8565  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8566  return (c1 + c2).uge(OpSizeInBits);
8567  };
8568  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8569  return DAG.getConstant(0, SDLoc(N), VT);
8570 
8571  auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8572  ConstantSDNode *RHS) {
8573  APInt c1 = LHS->getAPIntValue();
8574  APInt c2 = RHS->getAPIntValue();
8575  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8576  return (c1 + c2).ult(OpSizeInBits);
8577  };
8578  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8579  SDLoc DL(N);
8580  SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8581  return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8582  }
8583  }
8584 
8585  // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8586  // For this to be valid, the second form must not preserve any of the bits
8587  // that are shifted out by the inner shift in the first form. This means
8588  // the outer shift size must be >= the number of bits added by the ext.
8589  // As a corollary, we don't care what kind of ext it is.
8590  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8591  N0.getOpcode() == ISD::ANY_EXTEND ||
8592  N0.getOpcode() == ISD::SIGN_EXTEND) &&
8593  N0.getOperand(0).getOpcode() == ISD::SHL) {
8594  SDValue N0Op0 = N0.getOperand(0);
8595  SDValue InnerShiftAmt = N0Op0.getOperand(1);
8596  EVT InnerVT = N0Op0.getValueType();
8597  uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8598 
8599  auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8600  ConstantSDNode *RHS) {
8601  APInt c1 = LHS->getAPIntValue();
8602  APInt c2 = RHS->getAPIntValue();
8603  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8604  return c2.uge(OpSizeInBits - InnerBitwidth) &&
8605  (c1 + c2).uge(OpSizeInBits);
8606  };
8607  if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8608  /*AllowUndefs*/ false,
8609  /*AllowTypeMismatch*/ true))
8610  return DAG.getConstant(0, SDLoc(N), VT);
8611 
8612  auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8613  ConstantSDNode *RHS) {
8614  APInt c1 = LHS->getAPIntValue();
8615  APInt c2 = RHS->getAPIntValue();
8616  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8617  return c2.uge(OpSizeInBits - InnerBitwidth) &&
8618  (c1 + c2).ult(OpSizeInBits);
8619  };
8620  if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8621  /*AllowUndefs*/ false,
8622  /*AllowTypeMismatch*/ true)) {
8623  SDLoc DL(N);
8624  SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8625  SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8626  Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8627  return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8628  }
8629  }
8630 
8631  // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8632  // Only fold this if the inner zext has no other uses to avoid increasing
8633  // the total number of instructions.
8634  if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8635  N0.getOperand(0).getOpcode() == ISD::SRL) {
8636  SDValue N0Op0 = N0.getOperand(0);
8637  SDValue InnerShiftAmt = N0Op0.getOperand(1);
8638 
8639  auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8640  APInt c1 = LHS->getAPIntValue();
8641  APInt c2 = RHS->getAPIntValue();
8642  zeroExtendToMatch(c1, c2);
8643  return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8644  };
8645  if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8646  /*AllowUndefs*/ false,
8647  /*AllowTypeMismatch*/ true)) {
8648  SDLoc DL(N);
8649  EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8650  SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8651  NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8652  AddToWorklist(NewSHL.getNode());
8653  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8654  }
8655  }
8656 
8657  // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8658  // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
8659  // TODO - support non-uniform vector shift amounts.
8661  if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8662  N0->getFlags().hasExact()) {
8663  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8664  uint64_t C1 = N0C1->getZExtValue();
8665  uint64_t C2 = N1C->getZExtValue();
8666  SDLoc DL(N);
8667  if (C1 <= C2)
8668  return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8669  DAG.getConstant(C2 - C1, DL, ShiftVT));
8670  return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8671  DAG.getConstant(C1 - C2, DL, ShiftVT));
8672  }
8673  }
8674 
8675  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8676  // (and (srl x, (sub c1, c2), MASK)
8677  // Only fold this if the inner shift has no other uses -- if it does, folding
8678  // this will increase the total number of instructions.
8679  // TODO - drop hasOneUse requirement if c1 == c2?
8680  // TODO - support non-uniform vector shift amounts.
8681  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8683  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8684  if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8685  uint64_t c1 = N0C1->getZExtValue();
8686  uint64_t c2 = N1C->getZExtValue();
8687  APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8688  SDValue Shift;
8689  if (c2 > c1) {
8690  Mask <<= c2 - c1;
8691  SDLoc DL(N);
8692  Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8693  DAG.getConstant(c2 - c1, DL, ShiftVT));
8694  } else {
8695  Mask.lshrInPlace(c1 - c2);
8696  SDLoc DL(N);
8697  Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8698  DAG.getConstant(c1 - c2, DL, ShiftVT));
8699  }
8700  SDLoc DL(N0);
8701  return DAG.getNode(ISD::AND, DL, VT, Shift,
8702  DAG.getConstant(Mask, DL, VT));
8703  }
8704  }
8705  }
8706 
8707  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8708  if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8709  isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8710  SDLoc DL(N);
8711  SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8712  SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8713  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8714  }
8715 
8716  // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8717  // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8718  // Variant of version done on multiply, except mul by a power of 2 is turned
8719  // into a shift.
8720  if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8721  N0.getNode()->hasOneUse() &&
8722  isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8723  isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8725  SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8726  SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8727  AddToWorklist(Shl0.getNode());
8728  AddToWorklist(Shl1.getNode());
8729  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8730  }
8731 
8732  // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8733  if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8734  isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8735  isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8736  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8737  if (isConstantOrConstantVector(Shl))
8738  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8739  }
8740 
8741  if (N1C && !N1C->isOpaque())
8742  if (SDValue NewSHL = visitShiftByConstant(N))
8743  return NewSHL;
8744 
8745  // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8746  if (N0.getOpcode() == ISD::VSCALE)
8747  if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8748  const APInt &C0 = N0.getConstantOperandAPInt(0);
8749  const APInt &C1 = NC1->getAPIntValue();
8750  return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8751  }
8752 
8753  // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8754  APInt ShlVal;
8755  if (N0.getOpcode() == ISD::STEP_VECTOR)
8756  if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8757  const APInt &C0 = N0.getConstantOperandAPInt(0);
8758  if (ShlVal.ult(C0.getBitWidth())) {
8759  APInt NewStep = C0 << ShlVal;
8760  return DAG.getStepVector(SDLoc(N), VT, NewStep);
8761  }
8762  }
8763 
8764  return SDValue();
8765 }
8766 
8767 // Transform a right shift of a multiply into a multiply-high.
8768 // Examples:
8769 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8770 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8772  const TargetLowering &TLI) {
8773  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8774  "SRL or SRA node is required here!");
8775 
8776  // Check the shift amount. Proceed with the transformation if the shift
8777  // amount is constant.
8778  ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8779  if (!ShiftAmtSrc)
8780  return SDValue();
8781 
8782  SDLoc DL(N);
8783 
8784  // The operation feeding into the shift must be a multiply.
8785  SDValue ShiftOperand = N->getOperand(0);
8786  if (ShiftOperand.getOpcode() != ISD::MUL)
8787  return SDValue();
8788 
8789  // Both operands must be equivalent extend nodes.
8790  SDValue LeftOp = ShiftOperand.getOperand(0);
8791  SDValue RightOp = ShiftOperand.getOperand(1);
8792 
8793  bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8794  bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8795 
8796  if (!IsSignExt && !IsZeroExt)
8797  return SDValue();
8798 
8799  EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8800  unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8801 
8802  SDValue MulhRightOp;
8803  if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
8804  unsigned ActiveBits = IsSignExt
8805  ? Constant->getAPIntValue().getMinSignedBits()
8806  : Constant->getAPIntValue().getActiveBits();
8807  if (ActiveBits > NarrowVTSize)
8808  return SDValue();
8809  MulhRightOp = DAG.getConstant(
8810  Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
8811  NarrowVT);
8812  } else {
8813  if (LeftOp.getOpcode() != RightOp.getOpcode())
8814  return SDValue();
8815  // Check that the two extend nodes are the same type.
8816  if (NarrowVT != RightOp.getOperand(0).getValueType())
8817  return SDValue();
8818  MulhRightOp = RightOp.getOperand(0);
8819  }
8820 
8821  EVT WideVT = LeftOp.getValueType();
8822  // Proceed with the transformation if the wide types match.
8823  assert((WideVT == RightOp.getValueType()) &&
8824  "Cannot have a multiply node with two different operand types.");
8825 
8826  // Proceed with the transformation if the wide type is twice as large
8827  // as the narrow type.
8828  if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
8829  return SDValue();
8830 
8831  // Check the shift amount with the narrow type size.
8832  // Proceed with the transformation if the shift amount is the width
8833  // of the narrow type.
8834  unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8835  if (ShiftAmt != NarrowVTSize)
8836  return SDValue();
8837 
8838  // If the operation feeding into the MUL is a sign extend (sext),
8839  // we use mulhs. Othewise, zero extends (zext) use mulhu.
8840  unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8841 
8842  // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8843  if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8844  return SDValue();
8845 
8846  SDValue Result =
8847  DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
8848  return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
8849  : DAG.getZExtOrTrunc(Result, DL, WideVT));
8850 }
8851 
8852 SDValue DAGCombiner::visitSRA(SDNode *N) {
8853  SDValue N0 = N->getOperand(0);
8854  SDValue N1 = N->getOperand(1);
8855  if (SDValue V = DAG.simplifyShift(N0, N1))
8856  return V;
8857 
8858  EVT VT = N0.getValueType();
8859  unsigned OpSizeInBits = VT.getScalarSizeInBits();
8860 
8861  // fold (sra c1, c2) -> (sra c1, c2)
8862  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8863  return C;
8864 
8865  // Arithmetic shifting an all-sign-bit value is a no-op.
8866  // fold (sra 0, x) -> 0
8867  // fold (sra -1, x) -> -1
8868  if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8869  return N0;
8870 
8871  // fold vector ops
8872  if (VT.isVector())
8873  if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8874  return FoldedVOp;
8875 
8876  if (SDValue NewSel = foldBinOpIntoSelect(N))
8877  return NewSel;
8878 
8879  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8880  // sext_inreg.
8882  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8883  unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8884  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8885  if (VT.isVector())
8886  ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8887  VT.getVectorElementCount());
8888  if (!LegalOperations ||
8891  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8892  N0.getOperand(0), DAG.getValueType(ExtVT));
8893  // Even if we can't convert to sext_inreg, we might be able to remove
8894  // this shift pair if the input is already sign extended.
8895  if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8896  return N0.getOperand(0);
8897  }
8898 
8899  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8900  // clamp (add c1, c2) to max shift.
8901  if (N0.getOpcode() == ISD::SRA) {
8902  SDLoc DL(N);
8903  EVT ShiftVT = N1.getValueType();
8904  EVT ShiftSVT = ShiftVT.getScalarType();
8905  SmallVector<SDValue, 16> ShiftValues;
8906 
8907  auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8908  APInt c1 = LHS->getAPIntValue();
8909  APInt c2 = RHS->getAPIntValue();
8910  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8911  APInt Sum = c1 + c2;
8912  unsigned ShiftSum =
8913  Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8914  ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8915  return true;
8916  };
8917  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8918  SDValue ShiftValue;
8919  if (N1.getOpcode() == ISD::BUILD_VECTOR)
8920  ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8921  else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8922  assert(ShiftValues.size() == 1 &&
8923  "Expected matchBinaryPredicate to return one element for "
8924  "SPLAT_VECTORs");
8925  ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8926  } else
8927  ShiftValue = ShiftValues[0];
8928  return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8929  }
8930  }
8931 
8932  // fold (sra (shl X, m), (sub result_size, n))
8933  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8934  // result_size - n != m.
8935  // If truncate is free for the target sext(shl) is likely to result in better
8936  // code.
8937  if (N0.getOpcode() == ISD::SHL && N1C) {
8938  // Get the two constanst of the shifts, CN0 = m, CN = n.
8939  const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8940  if (N01C) {
8941  LLVMContext &Ctx = *DAG.getContext();
8942  // Determine what the truncate's result bitsize and type would be.
8943  EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8944 
8945  if (VT.isVector())
8946  TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8947 
8948  // Determine the residual right-shift amount.
8949  int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8950 
8951  // If the shift is not a no-op (in which case this should be just a sign
8952  // extend already), the truncated to type is legal, sign_extend is legal
8953  // on that type, and the truncate to that type is both legal and free,
8954  // perform the transform.
8955  if ((ShiftAmt > 0) &&
8958  TLI.isTruncateFree(VT, TruncVT)) {
8959  SDLoc DL(N);
8960  SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8962  SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8963  N0.getOperand(0), Amt);
8964  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8965  Shift);
8966  return DAG.getNode(ISD::SIGN_EXTEND, DL,
8967  N->getValueType(0), Trunc);
8968  }
8969  }
8970  }
8971 
8972  // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8973  // sra (add (shl X, N1C), AddC), N1C -->
8974  // sext (add (trunc X to (width - N1C)), AddC')
8975  if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8976  N0.getOperand(0).getOpcode() == ISD::SHL &&
8977  N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8978  if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8979  SDValue Shl = N0.getOperand(0);
8980  // Determine what the truncate's type would be and ask the target if that
8981  // is a free operation.
8982  LLVMContext &Ctx = *DAG.getContext();
8983  unsigned ShiftAmt = N1C->getZExtValue();
8984  EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8985  if (VT.isVector())
8986  TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8987 
8988  // TODO: The simple type check probably belongs in the default hook
8989  // implementation and/or target-specific overrides (because
8990  // non-simple types likely require masking when legalized), but that
8991  // restriction may conflict with other transforms.
8992  if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8993  TLI.isTruncateFree(VT, TruncVT)) {
8994  SDLoc DL(N);
8995  SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8996  SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8997  trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8998  SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8999  return DAG.getSExtOrTrunc(Add, DL, VT);
9000  }
9001  }
9002  }
9003 
9004  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
9005  if (N1.getOpcode() == ISD::TRUNCATE &&
9006  N1.getOperand(0).getOpcode() == ISD::AND) {
9007  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9008  return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
9009  }
9010 
9011  // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
9012  // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
9013  // if c1 is equal to the number of bits the trunc removes
9014  // TODO - support non-uniform vector shift amounts.
9015  if (N0.getOpcode() == ISD::TRUNCATE &&
9016  (N0.getOperand(0).getOpcode() == ISD::SRL ||
9017  N0.getOperand(0).getOpcode() == ISD::SRA) &&
9018  N0.getOperand(0).hasOneUse() &&
9019  N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
9020  SDValue N0Op0 = N0.getOperand(0);
9021  if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
9022  EVT LargeVT = N0Op0.getValueType();
9023  unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
9024  if (LargeShift->getAPIntValue() == TruncBits) {
9025  SDLoc DL(N);
9026  SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
9027  getShiftAmountTy(LargeVT));
9028  SDValue SRA =
9029  DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
9030  return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
9031  }
9032  }
9033  }
9034 
9035  // Simplify, based on bits shifted out of the LHS.
9036  if (SimplifyDemandedBits(SDValue(N, 0)))
9037  return SDValue(N, 0);
9038 
9039  // If the sign bit is known to be zero, switch this to a SRL.
9040  if (DAG.SignBitIsZero(N0))
9041  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
9042 
9043  if (N1C && !N1C->isOpaque())
9044  if (SDValue NewSRA = visitShiftByConstant(N))
9045  return NewSRA;
9046 
9047  // Try to transform this shift into a multiply-high if
9048  // it matches the appropriate pattern detected in combineShiftToMULH.
9049  if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9050  return MULH;
9051 
9052  // Attempt to convert a sra of a load into a narrower sign-extending load.
9053  if (SDValue NarrowLoad = reduceLoadWidth(N))
9054  return NarrowLoad;
9055 
9056  return SDValue();
9057 }
9058 
9059 SDValue DAGCombiner::visitSRL(SDNode *N) {
9060  SDValue N0 = N->getOperand(0);
9061  SDValue N1 = N->getOperand(1);
9062  if (SDValue V = DAG.simplifyShift(N0, N1))
9063  return V;
9064 
9065  EVT VT = N0.getValueType();
9066  unsigned OpSizeInBits = VT.getScalarSizeInBits();
9067 
9068  // fold (srl c1, c2) -> c1 >>u c2
9069  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
9070  return C;
9071 
9072  // fold vector ops
9073  if (VT.isVector())
9074  if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9075  return FoldedVOp;
9076 
9077  if (SDValue NewSel = foldBinOpIntoSelect(N))
9078  return NewSel;
9079 
9080  // if (srl x, c) is known to be zero, return 0
9082  if (N1C &&
9083  DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9084  return DAG.getConstant(0, SDLoc(N), VT);
9085 
9086  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
9087  if (N0.getOpcode() == ISD::SRL) {
9088  auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9089  ConstantSDNode *RHS) {
9090  APInt c1 = LHS->getAPIntValue();
9091  APInt c2 = RHS->getAPIntValue();
9092  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9093  return (c1 + c2).uge(OpSizeInBits);
9094  };
9095  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9096  return DAG.getConstant(0, SDLoc(N), VT);
9097 
9098  auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9099  ConstantSDNode *RHS) {
9100  APInt c1 = LHS->getAPIntValue();
9101  APInt c2 = RHS->getAPIntValue();
9102  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9103  return (c1 + c2).ult(OpSizeInBits);
9104  };
9105  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9106  SDLoc DL(N);
9107  EVT ShiftVT = N1.getValueType();
9108  SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9109  return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9110  }
9111  }
9112 
9113  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9114  N0.getOperand(0).getOpcode() == ISD::SRL) {
9115  SDValue InnerShift = N0.getOperand(0);
9116  // TODO - support non-uniform vector shift amounts.
9117  if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9118  uint64_t c1 = N001C->getZExtValue();
9119  uint64_t c2 = N1C->getZExtValue();
9120  EVT InnerShiftVT = InnerShift.getValueType();
9121  EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9122  uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9123  // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9124  // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9125  if (c1 + OpSizeInBits == InnerShiftSize) {
9126  SDLoc DL(N);
9127  if (c1 + c2 >= InnerShiftSize)
9128  return DAG.getConstant(0, DL, VT);
9129  SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9130  SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9131  InnerShift.getOperand(0), NewShiftAmt);
9132  return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9133  }
9134  // In the more general case, we can clear the high bits after the shift:
9135  // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9136  if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9137  c1 + c2 < InnerShiftSize) {
9138  SDLoc DL(N);
9139  SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9140  SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9141  InnerShift.getOperand(0), NewShiftAmt);
9142  SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9143  OpSizeInBits - c2),
9144  DL, InnerShiftVT);
9145  SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9146  return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9147  }
9148  }
9149  }
9150 
9151  // fold (srl (shl x, c), c) -> (and x, cst2)
9152  // TODO - (srl (shl x, c1), c2).
9153  if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
9154  isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
9155  SDLoc DL(N);
9156  SDValue Mask =
9157  DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
9158  AddToWorklist(Mask.getNode());
9159  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
9160  }
9161 
9162  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9163  // TODO - support non-uniform vector shift amounts.
9164  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9165  // Shifting in all undef bits?
9166  EVT SmallVT = N0.getOperand(0).getValueType();
9167  unsigned BitSize = SmallVT.getScalarSizeInBits();
9168  if (N1C->getAPIntValue().uge(BitSize))
9169  return DAG.getUNDEF(VT);
9170 
9171  if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9172  uint64_t ShiftAmt = N1C->getZExtValue();
9173  SDLoc DL0(N0);
9174  SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9175  N0.getOperand(0),
9176  DAG.getConstant(ShiftAmt, DL0,
9177  getShiftAmountTy(SmallVT)));
9178  AddToWorklist(SmallShift.getNode());
9179  APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9180  SDLoc DL(N);
9181  return DAG.getNode(ISD::AND, DL, VT,
9182  DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9183  DAG.getConstant(Mask, DL, VT));
9184  }
9185  }
9186 
9187  // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
9188  // bit, which is unmodified by sra.
9189  if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9190  if (N0.getOpcode() == ISD::SRA)
9191  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9192  }
9193 
9194  // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
9195  if (N1C && N0.getOpcode() == ISD::CTLZ &&
9196  N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9197  KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9198 
9199  // If any of the input bits are KnownOne, then the input couldn't be all
9200  // zeros, thus the result of the srl will always be zero.
9201  if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9202 
9203  // If all of the bits input the to ctlz node are known to be zero, then
9204  // the result of the ctlz is "32" and the result of the shift is one.
9205  APInt UnknownBits = ~Known.Zero;
9206  if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9207 
9208  // Otherwise, check to see if there is exactly one bit input to the ctlz.
9209  if (UnknownBits.isPowerOf2()) {
9210  // Okay, we know that only that the single bit specified by UnknownBits
9211  // could be set on input to the CTLZ node. If this bit is set, the SRL
9212  // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9213  // to an SRL/XOR pair, which is likely to simplify more.
9214  unsigned ShAmt = UnknownBits.countTrailingZeros();
9215  SDValue Op = N0.getOperand(0);
9216 
9217  if (ShAmt) {
9218  SDLoc DL(N0);
9219  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9220  DAG.getConstant(ShAmt, DL,
9221  getShiftAmountTy(Op.getValueType())));
9222  AddToWorklist(Op.getNode());
9223  }
9224 
9225  SDLoc DL(N);
9226  return DAG.getNode(ISD::XOR, DL, VT,
9227  Op, DAG.getConstant(1, DL, VT));
9228  }
9229  }
9230 
9231  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9232  if (N1.getOpcode() == ISD::TRUNCATE &&
9233  N1.getOperand(0).getOpcode() == ISD::AND) {
9234  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9235  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9236  }
9237 
9238  // fold operands of srl based on knowledge that the low bits are not
9239  // demanded.
9240  if (SimplifyDemandedBits(SDValue(N, 0)))
9241  return SDValue(N, 0);
9242 
9243  if (N1C && !N1C->isOpaque())
9244  if (SDValue NewSRL = visitShiftByConstant(N))
9245  return NewSRL;
9246 
9247  // Attempt to convert a srl of a load into a narrower zero-extending load.
9248  if (SDValue NarrowLoad = reduceLoadWidth(N))
9249  return NarrowLoad;
9250 
9251  // Here is a common situation. We want to optimize:
9252  //
9253  // %a = ...
9254  // %b = and i32 %a, 2
9255  // %c = srl i32 %b, 1
9256  // brcond i32 %c ...
9257  //
9258  // into
9259  //
9260  // %a = ...
9261  // %b = and %a, 2
9262  // %c = setcc eq %b, 0
9263  // brcond %c ...
9264  //
9265  // However when after the source operand of SRL is optimized into AND, the SRL
9266  // itself may not be optimized further. Look for it and add the BRCOND into
9267  // the worklist.
9268  if (N->hasOneUse()) {
9269  SDNode *Use = *N->use_begin();
9270  if (Use->getOpcode() == ISD::BRCOND)
9271  AddToWorklist(Use);
9272  else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9273  // Also look pass the truncate.
9274  Use = *Use->use_begin();
9275  if (Use->getOpcode() == ISD::BRCOND)
9276  AddToWorklist(Use);
9277  }
9278  }
9279 
9280  // Try to transform this shift into a multiply-high if
9281  // it matches the appropriate pattern detected in combineShiftToMULH.
9282  if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9283  return MULH;
9284 
9285  return SDValue();
9286 }
9287 
9288 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9289  EVT VT = N->getValueType(0);
9290  SDValue N0 = N->getOperand(0);
9291  SDValue N1 = N->getOperand(1);
9292  SDValue N2 = N->getOperand(2);
9293  bool IsFSHL = N->getOpcode() == ISD::FSHL;
9294  unsigned BitWidth = VT.getScalarSizeInBits();
9295 
9296  // fold (fshl N0, N1, 0) -> N0
9297  // fold (fshr N0, N1, 0) -> N1
9298  if (isPowerOf2_32(BitWidth))
9299  if (DAG.MaskedValueIsZero(
9300  N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9301  return IsFSHL ? N0 : N1;
9302 
9303  auto IsUndefOrZero = [](SDValue V) {
9304  return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9305  };
9306 
9307  // TODO - support non-uniform vector shift amounts.
9308  if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9309  EVT ShAmtTy = N2.getValueType();
9310 
9311  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9312  if (Cst->getAPIntValue().uge(BitWidth)) {
9313  uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9314  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9315  DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9316  }
9317 
9318  unsigned ShAmt = Cst->getZExtValue();
9319  if (ShAmt == 0)
9320  return IsFSHL ? N0 : N1;
9321 
9322  // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9323  // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9324  // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9325  // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9326  if (IsUndefOrZero(N0))
9327  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9328  DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9329  SDLoc(N), ShAmtTy));
9330  if (IsUndefOrZero(N1))
9331  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9332  DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9333  SDLoc(N), ShAmtTy));
9334 
9335  // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9336  // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9337  // TODO - bigendian support once we have test coverage.
9338  // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9339  // TODO - permit LHS EXTLOAD if extensions are shifted out.
9340  if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9341  !DAG.getDataLayout().isBigEndian()) {
9342  auto *LHS = dyn_cast<LoadSDNode>(N0);
9343  auto *RHS = dyn_cast<LoadSDNode>(N1);
9344  if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9345  LHS->getAddressSpace() == RHS->getAddressSpace() &&
9346  (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9348  if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9349  SDLoc DL(RHS);
9350  uint64_t PtrOff =
9351  IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9352  Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9353  bool Fast = false;
9354  if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9355  RHS->getAddressSpace(), NewAlign,
9356  RHS->getMemOperand()->getFlags(), &Fast) &&
9357  Fast) {
9358  SDValue NewPtr = DAG.getMemBasePlusOffset(
9359  RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9360  AddToWorklist(NewPtr.getNode());
9361  SDValue Load = DAG.getLoad(
9362  VT, DL, RHS->getChain(), NewPtr,
9363  RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9364  RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9365  // Replace the old load's chain with the new load's chain.
9366  WorklistRemover DeadNodes(*this);
9367  DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9368  return Load;
9369  }
9370  }
9371  }
9372  }
9373  }
9374 
9375  // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9376  // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9377  // iff We know the shift amount is in range.
9378  // TODO: when is it worth doing SUB(BW, N2) as well?
9379  if (isPowerOf2_32(BitWidth)) {
9380  APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9381  if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9382  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9383  if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9384  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9385  }
9386 
9387  // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9388  // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9389  // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9390  // is legal as well we might be better off avoiding non-constant (BW - N2).
9391  unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9392  if (N0 == N1 && hasOperation(RotOpc, VT))
9393  return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9394 
9395  // Simplify, based on bits shifted out of N0/N1.
9396  if (SimplifyDemandedBits(SDValue(N, 0)))
9397  return SDValue(N, 0);
9398 
9399  return SDValue();
9400 }
9401 
9402 SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
9403  SDValue N0 = N->getOperand(0);
9404  SDValue N1 = N->getOperand(1);
9405  if (SDValue V = DAG.simplifyShift(N0, N1))
9406  return V;
9407 
9408  EVT VT = N0.getValueType();
9409 
9410  // fold (*shlsat c1, c2) -> c1<<c2
9411  if (SDValue C =
9412  DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
9413  return C;
9414 
9415  return SDValue();
9416 }
9417 
9418 // Given a ABS node, detect the following pattern:
9419 // (ABS (SUB (EXTEND a), (EXTEND b))).
9420 // Generates UABD/SABD instruction.
9422  const TargetLowering &TLI) {
9423  SDValue AbsOp1 = N->getOperand(0);
9424  SDValue Op0, Op1;
9425 
9426  if (AbsOp1.getOpcode() != ISD::SUB)
9427  return SDValue();
9428 
9429  Op0 = AbsOp1.getOperand(0);
9430  Op1 = AbsOp1.getOperand(1);
9431 
9432  unsigned Opc0 = Op0.getOpcode();
9433  // Check if the operands of the sub are (zero|sign)-extended.
9434  if (Opc0 != Op1.getOpcode() ||
9435  (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9436  return SDValue();
9437 
9438  EVT VT1 = Op0.getOperand(0).getValueType();
9439  EVT VT2 = Op1.getOperand(0).getValueType();
9440  // Check if the operands are of same type and valid size.
9441  unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9442  if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9443  return SDValue();
9444 
9445  Op0 = Op0.getOperand(0);
9446  Op1 = Op1.getOperand(0);
9447  SDValue ABD =
9448  DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9449  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9450 }
9451 
9452 SDValue DAGCombiner::visitABS(SDNode *N) {
9453  SDValue N0 = N->getOperand(0);
9454  EVT VT = N->getValueType(0);
9455 
9456  // fold (abs c1) -> c2
9458  return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9459  // fold (abs (abs x)) -> (abs x)
9460  if (N0.getOpcode() == ISD::ABS)
9461  return N0;
9462  // fold (abs x) -> x iff not-negative
9463  if (DAG.SignBitIsZero(N0))
9464  return N0;
9465 
9466  if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9467  return ABD;
9468 
9469  return SDValue();
9470 }
9471 
9472 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9473  SDValue N0 = N->getOperand(0);
9474  EVT VT = N->getValueType(0);
9475 
9476  // fold (bswap c1) -> c2
9478  return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9479  // fold (bswap (bswap x)) -> x
9480  if (N0.getOpcode() == ISD::BSWAP)
9481  return N0->getOperand(0);
9482 
9483  // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
9484  // isn't supported, it will be expanded to bswap followed by a manual reversal
9485  // of bits in each byte. By placing bswaps before bitreverse, we can remove
9486  // the two bswaps if the bitreverse gets expanded.
9487  if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
9488  SDLoc DL(N);
9489  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9490  return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
9491  }
9492 
9493  return SDValue();
9494 }
9495 
9496 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9497  SDValue N0 = N->getOperand(0);
9498  EVT VT = N->getValueType(0);
9499 
9500  // fold (bitreverse c1) -> c2
9502  return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9503  // fold (bitreverse (bitreverse x)) -> x
9504  if (N0.getOpcode() == ISD::BITREVERSE)
9505  return N0.getOperand(0);
9506  return SDValue();
9507 }
9508 
9509 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9510  SDValue N0 = N->getOperand(0);
9511  EVT VT = N->getValueType(0);
9512 
9513  // fold (ctlz c1) -> c2
9515  return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9516 
9517  // If the value is known never to be zero, switch to the undef version.
9518  if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9519  if (DAG.isKnownNeverZero(N0))
9520  return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9521  }
9522 
9523  return SDValue();
9524 }
9525 
9526 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9527  SDValue N0 = N->getOperand(0);
9528  EVT VT = N->getValueType(0);
9529 
9530  // fold (ctlz_zero_undef c1) -> c2
9532  return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9533  return SDValue();
9534 }
9535 
9536 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9537  SDValue N0 = N->getOperand(0);
9538  EVT VT = N->getValueType(0);
9539 
9540  // fold (cttz c1) -> c2
9542  return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9543 
9544  // If the value is known never to be zero, switch to the undef version.
9545  if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9546  if (DAG.isKnownNeverZero(N0))
9547  return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9548  }
9549 
9550  return SDValue();
9551 }
9552 
9553 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9554  SDValue N0 = N->getOperand(0);
9555  EVT VT = N->getValueType(0);
9556 
9557  // fold (cttz_zero_undef c1) -> c2
9559  return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9560  return SDValue();
9561 }
9562 
9563 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9564  SDValue N0 = N->getOperand(0);
9565  EVT VT = N->getValueType(0);
9566 
9567  // fold (ctpop c1) -> c2
9569  return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9570  return SDValue();
9571 }
9572 
9573 // FIXME: This should be checking for no signed zeros on individual operands, as
9574 // well as no nans.
9576  SDValue RHS,
9577  const TargetLowering &TLI) {
9578  const TargetOptions &Options = DAG.getTarget().Options;
9579  EVT VT = LHS.getValueType();
9580 
9581  return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9583  DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9584 }
9585 
9586 /// Generate Min/Max node
9588  SDValue RHS, SDValue True, SDValue False,
9589  ISD::CondCode CC, const TargetLowering &TLI,
9590  SelectionDAG &DAG) {
9591  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9592  return SDValue();
9593 
9594  EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9595  switch (CC) {
9596  case ISD::SETOLT:
9597  case ISD::SETOLE:
9598  case ISD::SETLT:
9599  case ISD::SETLE:
9600  case ISD::SETULT:
9601  case ISD::SETULE: {
9602  // Since it's known never nan to get here already, either fminnum or
9603  // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9604  // expanded in terms of it.
9605  unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9606  if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9607  return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9608 
9609  unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9610  if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9611  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9612  return SDValue();
9613  }
9614  case ISD::SETOGT:
9615  case ISD::SETOGE:
9616  case ISD::SETGT:
9617  case ISD::SETGE:
9618  case ISD::SETUGT:
9619  case ISD::SETUGE: {
9620  unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9621  if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9622  return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9623 
9624  unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9625  if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9626  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9627  return SDValue();
9628  }
9629  default:
9630  return SDValue();
9631  }
9632 }
9633 
9634 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9635 /// the condition operand sign-bit across the value width and use it as a mask.
9637  SDValue Cond = N->getOperand(0);
9638  SDValue C1 = N->getOperand(1);
9639  SDValue C2 = N->getOperand(2);
9641  return SDValue();
9642 
9643  EVT VT = N->getValueType(0);
9644  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9645  VT != Cond.getOperand(0).getValueType())
9646  return SDValue();
9647 
9648  // The inverted-condition + commuted-select variants of these patterns are
9649  // canonicalized to these forms in IR.
9650  SDValue X = Cond.getOperand(0);
9651  SDValue CondC = Cond.getOperand(1);
9652  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9653  if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9655  // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9656  SDLoc DL(N);
9657  SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9658  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9659  return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9660  }
9661  if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9662  // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9663  SDLoc DL(N);
9664  SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9665  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9666  return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9667  }
9668  return SDValue();
9669 }
9670 
9671 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9672  SDValue Cond = N->getOperand(0);
9673  SDValue N1 = N->getOperand(1);
9674  SDValue N2 = N->getOperand(2);
9675  EVT VT = N->getValueType(0);
9676  EVT CondVT = Cond.getValueType();
9677  SDLoc DL(N);
9678 
9679  if (!VT.isInteger())
9680  return SDValue();
9681 
9682  auto *C1 = dyn_cast<ConstantSDNode>(N1);
9683  auto *C2 = dyn_cast<ConstantSDNode>(N2);
9684  if (!C1 || !C2)
9685  return SDValue();
9686 
9687  // Only do this before legalization to avoid conflicting with target-specific
9688  // transforms in the other direction (create a select from a zext/sext). There
9689  // is also a target-independent combine here in DAGCombiner in the other
9690  // direction for (select Cond, -1, 0) when the condition is not i1.
9691  if (CondVT == MVT::i1 && !LegalOperations) {
9692  if (C1->isZero() && C2->isOne()) {
9693  // select Cond, 0, 1 --> zext (!Cond)
9694  SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9695  if (VT != MVT::i1)
9696  NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9697  return NotCond;
9698  }
9699  if (C1->isZero() && C2->isAllOnes()) {
9700  // select Cond, 0, -1 --> sext (!Cond)
9701  SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9702  if (VT != MVT::i1)
9703  NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9704  return NotCond;
9705  }
9706  if (C1->isOne() && C2->isZero()) {
9707  // select Cond, 1, 0 --> zext (Cond)
9708  if (VT != MVT::i1)
9709  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9710  return Cond;
9711  }
9712  if (C1->isAllOnes() && C2->isZero()) {
9713  // select Cond, -1, 0 --> sext (Cond)
9714  if (VT != MVT::i1)
9715  Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9716  return Cond;
9717  }
9718 
9719  // Use a target hook because some targets may prefer to transform in the
9720  // other direction.
9721  if (TLI.convertSelectOfConstantsToMath(VT)) {
9722  // For any constants that differ by 1, we can transform the select into an
9723  // extend and add.
9724  const APInt &C1Val = C1->getAPIntValue();
9725  const APInt &C2Val = C2->getAPIntValue();
9726  if (C1Val - 1 == C2Val) {
9727  // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9728  if (VT != MVT::i1)
9729  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9730  return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9731  }
9732  if (C1Val + 1 == C2Val) {
9733  // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9734  if (VT != MVT::i1)
9735  Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9736  return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9737  }
9738 
9739  // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9740  if (C1Val.isPowerOf2() && C2Val.isZero()) {
9741  if (VT != MVT::i1)
9742  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9743  SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9744  return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9745  }
9746 
9747  if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9748  return V;
9749  }
9750 
9751  return SDValue();
9752  }
9753 
9754  // fold (select Cond, 0, 1) -> (xor Cond, 1)
9755  // We can't do this reliably if integer based booleans have different contents
9756  // to floating point based booleans. This is because we can't tell whether we
9757  // have an integer-based boolean or a floating-point-based boolean unless we
9758  // can find the SETCC that produced it and inspect its operands. This is
9759  // fairly easy if C is the SETCC node, but it can potentially be
9760  // undiscoverable (or not reasonably discoverable). For example, it could be
9761  // in another basic block or it could require searching a complicated
9762  // expression.
9763  if (CondVT.isInteger() &&
9764  TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9766  TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9768  C1->isZero() && C2->isOne()) {
9769  SDValue NotCond =
9770  DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9771  if (VT.bitsEq(CondVT))
9772  return NotCond;
9773  return DAG.getZExtOrTrunc(NotCond, DL, VT);
9774  }
9775 
9776  return SDValue();
9777 }
9778 
9780  assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9781  "Expected a (v)select");
9782  SDValue Cond = N->getOperand(0);
9783  SDValue T = N->getOperand(1), F = N->getOperand(2);
9784  EVT VT = N->getValueType(0);
9785  if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9786  return SDValue();
9787 
9788  // select Cond, Cond, F --> or Cond, F
9789  // select Cond, 1, F --> or Cond, F
9790  if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9791  return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9792 
9793  // select Cond, T, Cond --> and Cond, T
9794  // select Cond, T, 0 --> and Cond, T
9795  if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9796  return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9797 
9798  // select Cond, T, 1 --> or (not Cond), T
9799  if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9800  SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9801  return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9802  }
9803 
9804  // select Cond, 0, F --> and (not Cond), F
9805  if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9806  SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9807  return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9808  }
9809 
9810  return SDValue();
9811 }
9812 
9814  SDValue N0 = N->getOperand(0);
9815  SDValue N1 = N->getOperand(1);
9816  SDValue N2 = N->getOperand(2);
9817  EVT VT = N->getValueType(0);
9818  if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
9819  return SDValue();
9820 
9821  SDValue Cond0 = N0.getOperand(0);
9822  SDValue Cond1 = N0.getOperand(1);
9823  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9824  if (VT != Cond0.getValueType())
9825  return SDValue();
9826 
9827  // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
9828  // compare is inverted from that pattern ("Cond0 s> -1").
9829  if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
9830  ; // This is the pattern we are looking for.
9831  else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
9832  std::swap(N1, N2);
9833  else
9834  return SDValue();
9835 
9836  // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
9837  if (isNullOrNullSplat(N2)) {
9838  SDLoc DL(N);
9839  SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9840  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9841  return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
9842  }
9843 
9844  // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
9845  if (isAllOnesOrAllOnesSplat(N1)) {
9846  SDLoc DL(N);
9847  SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9848  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9849  return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
9850  }
9851 
9852  // If we have to invert the sign bit mask, only do that transform if the
9853  // target has a bitwise 'and not' instruction (the invert is free).
9854  // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
9855  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9856  if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
9857  SDLoc DL(N);
9858  SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9859  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9860  SDValue Not = DAG.getNOT(DL, Sra, VT);
9861  return DAG.getNode(ISD::AND, DL, VT, Not, N2);
9862  }
9863 
9864  // TODO: There's another pattern in this family, but it may require
9865  // implementing hasOrNot() to check for profitability:
9866  // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
9867 
9868  return SDValue();
9869 }
9870 
9871 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9872  SDValue N0 = N->getOperand(0);
9873  SDValue N1 = N->getOperand(1);
9874  SDValue N2 = N->getOperand(2);
9875  EVT VT = N->getValueType(0);
9876  EVT VT0 = N0.getValueType();
9877  SDLoc DL(N);
9878  SDNodeFlags Flags = N->getFlags();
9879 
9880  if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9881  return V;
9882 
9883  if (SDValue V = foldSelectOfConstants(N))
9884  return V;
9885 
9886  if (SDValue V = foldBoolSelectToLogic(N, DAG))
9887  return V;
9888 
9889  // If we can fold this based on the true/false value, do so.
9890  if (SimplifySelectOps(N, N1, N2))
9891  return SDValue(N, 0); // Don't revisit N.
9892 
9893  if (VT0 == MVT::i1) {
9894  // The code in this block deals with the following 2 equivalences:
9895  // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9896  // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9897  // The target can specify its preferred form with the
9898  // shouldNormalizeToSelectSequence() callback. However we always transform
9899  // to the right anyway if we find the inner select exists in the DAG anyway
9900  // and we always transform to the left side if we know that we can further
9901  // optimize the combination of the conditions.
9902  bool normalizeToSequence =
9904  // select (and Cond0, Cond1), X, Y
9905  // -> select Cond0, (select Cond1, X, Y), Y
9906  if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9907  SDValue Cond0 = N0->getOperand(0);
9908  SDValue Cond1 = N0->getOperand(1);
9909  SDValue InnerSelect =
9910  DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9911  if (normalizeToSequence || !InnerSelect.use_empty())
9912  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9913  InnerSelect, N2, Flags);
9914  // Cleanup on failure.
9915  if (InnerSelect.use_empty())
9916  recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9917  }
9918  // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9919  if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9920  SDValue Cond0 = N0->getOperand(0);
9921  SDValue Cond1 = N0->getOperand(1);
9922  SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9923  Cond1, N1, N2, Flags);
9924  if (normalizeToSequence || !InnerSelect.use_empty())
9925  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9926  InnerSelect, Flags);
9927  // Cleanup on failure.
9928  if (InnerSelect.use_empty())
9929  recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9930  }
9931 
9932  // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9933  if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9934  SDValue N1_0 = N1->getOperand(0);
9935  SDValue N1_1 = N1->getOperand(1);
9936  SDValue N1_2 = N1->getOperand(2);
9937  if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9938  // Create the actual and node if we can generate good code for it.
9939  if (!normalizeToSequence) {
9940  SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9941  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9942  N2, Flags);
9943  }
9944  // Otherwise see if we can optimize the "and" to a better pattern.
9945  if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9946  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9947  N2, Flags);
9948  }
9949  }
9950  }
9951  // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9952  if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9953  SDValue N2_0 = N2->getOperand(0);
9954  SDValue N2_1 = N2->getOperand(1);
9955  SDValue N2_2 = N2->getOperand(2);
9956  if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9957  // Create the actual or node if we can generate good code for it.
9958  if (!normalizeToSequence) {
9959  SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9960  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9961  N2_2, Flags);
9962  }
9963  // Otherwise see if we can optimize to a better pattern.
9964  if (SDValue Combined = visitORLike(N0, N2_0, N))
9965  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9966  N2_2, Flags);
9967  }
9968  }
9969  }
9970 
9971  // select (not Cond), N1, N2 -> select Cond, N2, N1
9972  if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9973  SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9974  SelectOp->setFlags(Flags);
9975  return SelectOp;
9976  }
9977 
9978  // Fold selects based on a setcc into other things, such as min/max/abs.
9979  if (N0.getOpcode() == ISD::SETCC) {
9980  SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9981  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9982 
9983  // select (fcmp lt x, y), x, y -> fminnum x, y
9984  // select (fcmp gt x, y), x, y -> fmaxnum x, y
9985  //
9986  // This is OK if we don't care what happens if either operand is a NaN.
9987  if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9988  if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9989  CC, TLI, DAG))
9990  return FMinMax;
9991 
9992  // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9993  // This is conservatively limited to pre-legal-operations to give targets
9994  // a chance to reverse the transform if they want to do that. Also, it is
9995  // unlikely that the pattern would be formed late, so it's probably not
9996  // worth going through the other checks.
9997  if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9998  CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9999  N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
10000  auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
10001  auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
10002  if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
10003  // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
10004  // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
10005  //
10006  // The IR equivalent of this transform would have this form:
10007  // %a = add %x, C
10008  // %c = icmp ugt %x, ~C
10009  // %r = select %c, -1, %a
10010  // =>
10011  // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
10012  // %u0 = extractvalue %u, 0
10013  // %u1 = extractvalue %u, 1
10014  // %r = select %u1, -1, %u0
10015  SDVTList VTs = DAG.getVTList(VT, VT0);
10016  SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
10017  return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
10018  }
10019  }
10020 
10021  if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
10022  (!LegalOperations &&
10024  // Any flags available in a select/setcc fold will be on the setcc as they
10025  // migrated from fcmp
10026  Flags = N0.getNode()->getFlags();
10027  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
10028  N2, N0.getOperand(2));
10029  SelectNode->setFlags(Flags);
10030  return SelectNode;
10031  }
10032 
10033  if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
10034  return NewSel;
10035  }
10036 
10037  if (!VT.isVector())
10038  if (SDValue BinOp = foldSelectOfBinops(N))
10039  return BinOp;
10040 
10041  return SDValue();
10042 }
10043 
10044 // This function assumes all the vselect's arguments are CONCAT_VECTOR
10045 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
10047  SDLoc DL(N);
10048  SDValue Cond = N->getOperand(0);
10049  SDValue LHS = N->getOperand(1);
10050  SDValue RHS = N->getOperand(2);
10051  EVT VT = N->getValueType(0);
10052  int NumElems = VT.getVectorNumElements();
10053  assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
10054  RHS.getOpcode() == ISD::CONCAT_VECTORS &&
10055  Cond.getOpcode() == ISD::BUILD_VECTOR);
10056 
10057  // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
10058  // binary ones here.
10059  if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
10060  return SDValue();
10061 
10062  // We're sure we have an even number of elements due to the
10063  // concat_vectors we have as arguments to vselect.
10064  // Skip BV elements until we find one that's not an UNDEF
10065  // After we find an UNDEF element, keep looping until we get to half the
10066  // length of the BV and see if all the non-undef nodes are the same.
10067  ConstantSDNode *BottomHalf = nullptr;
10068  for (int i = 0; i < NumElems / 2; ++i) {
10069  if (Cond->getOperand(i)->isUndef())
10070  continue;
10071 
10072  if (BottomHalf == nullptr)
10073  BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10074  else if (Cond->getOperand(i).getNode() != BottomHalf)
10075  return SDValue();
10076  }
10077 
10078  // Do the same for the second half of the BuildVector
10079  ConstantSDNode *TopHalf = nullptr;
10080  for (int i = NumElems / 2; i < NumElems; ++i) {
10081  if (Cond->getOperand(i)->isUndef())
10082  continue;
10083 
10084  if (TopHalf == nullptr)
10085  TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10086  else if (Cond->getOperand(i).getNode() != TopHalf)
10087  return SDValue();
10088  }
10089 
10090  assert(TopHalf && BottomHalf &&
10091  "One half of the selector was all UNDEFs and the other was all the "
10092  "same value. This should have been addressed before this function.");
10093  return DAG.getNode(
10094  ISD::CONCAT_VECTORS, DL, VT,
10095  BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
10096  TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
10097 }
10098 
10100  if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
10101  return false;
10102 
10103  // For now we check only the LHS of the add.
10104  SDValue LHS = Index.getOperand(0);
10105  SDValue SplatVal = DAG.getSplatValue(LHS);
10106  if (!SplatVal)
10107  return false;
10108 
10109  BasePtr = SplatVal;
10110  Index = Index.getOperand(1);
10111  return true;
10112 }
10113 
10114 // Fold sext/zext of index into index type.
10116  bool Scaled, SelectionDAG &DAG) {
10117  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10118 
10119  if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10120  SDValue Op = Index.getOperand(0);
10122  if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10123  Index = Op;
10124  return true;
10125  }
10126  }
10127 
10128  if (Index.getOpcode() == ISD::SIGN_EXTEND) {
10129  SDValue Op = Index.getOperand(0);
10131  if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10132  Index = Op;
10133  return true;
10134  }
10135  }
10136 
10137  return false;
10138 }
10139 
10140 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10141  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
10142  SDValue Mask = MSC->getMask();
10143  SDValue Chain = MSC->getChain();
10144  SDValue Index = MSC->getIndex();
10145  SDValue Scale = MSC->getScale();
10146  SDValue StoreVal = MSC->getValue();
10147  SDValue BasePtr = MSC->getBasePtr();
10148  SDLoc DL(N);
10149 
10150  // Zap scatters with a zero mask.
10152  return Chain;
10153 
10154  if (refineUniformBase(BasePtr, Index, DAG)) {
10155  SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10156  return DAG.getMaskedScatter(
10157  DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10158  MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10159  }
10160 
10161  if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
10162  SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10163  return DAG.getMaskedScatter(
10164  DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10165  MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10166  }
10167 
10168  return SDValue();
10169 }
10170 
10171 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10172  MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
10173  SDValue Mask = MST->getMask();
10174  SDValue Chain = MST->getChain();
10175  SDValue Value = MST->getValue();
10176  SDValue Ptr = MST->getBasePtr();
10177  SDLoc DL(N);
10178 
10179  // Zap masked stores with a zero mask.
10181  return Chain;
10182 
10183  // If this is a masked load with an all ones mask, we can use a unmasked load.
10184  // FIXME: Can we do this for indexed, compressing, or truncating stores?
10185  if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10186  !MST->isCompressingStore() && !MST->isTruncatingStore())
10187  return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10188  MST->getBasePtr(), MST->getPointerInfo(),
10190  MST->getAAInfo());
10191 
10192  // Try transforming N to an indexed store.
10193  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10194  return SDValue(N, 0);
10195 
10196  if (MST->isTruncatingStore() && MST->isUnindexed() &&
10197  Value.getValueType().isInteger() &&
10198  (!isa<ConstantSDNode>(Value) ||
10199  !cast<ConstantSDNode>(Value)->isOpaque())) {
10200  APInt TruncDemandedBits =
10201  APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10202  MST->getMemoryVT().getScalarSizeInBits());
10203 
10204  // See if we can simplify the operation with
10205  // SimplifyDemandedBits, which only works if the value has a single use.
10206  if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
10207  // Re-visit the store if anything changed and the store hasn't been merged
10208  // with another node (N is deleted) SimplifyDemandedBits will add Value's
10209  // node back to the worklist if necessary, but we also need to re-visit
10210  // the Store node itself.
10211  if (N->getOpcode() != ISD::DELETED_NODE)
10212  AddToWorklist(N);
10213  return SDValue(N, 0);
10214  }
10215  }
10216 
10217  // If this is a TRUNC followed by a masked store, fold this into a masked
10218  // truncating store. We can do this even if this is already a masked
10219  // truncstore.
10220  if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
10221  MST->isUnindexed() &&
10222  TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10223  MST->getMemoryVT(), LegalOperations)) {
10224  auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10225  Value.getOperand(0).getValueType());
10226  return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10227  MST->getOffset(), Mask, MST->getMemoryVT(),
10228  MST->getMemOperand(), MST->getAddressingMode(),
10229  /*IsTruncating=*/true);
10230  }
10231 
10232  return SDValue();
10233 }
10234 
10235 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10236  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
10237  SDValue Mask = MGT->getMask();
10238  SDValue Chain = MGT->getChain();
10239  SDValue Index = MGT->getIndex();
10240  SDValue Scale = MGT->getScale();
10241  SDValue PassThru = MGT->getPassThru();
10242  SDValue BasePtr = MGT->getBasePtr();
10243  SDLoc DL(N);
10244 
10245  // Zap gathers with a zero mask.
10247  return CombineTo(N, PassThru, MGT->getChain());
10248 
10249  if (refineUniformBase(BasePtr, Index, DAG)) {
10250  SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10251  return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10252  MGT->getMemoryVT(), DL, Ops,
10253  MGT->getMemOperand(), MGT->getIndexType(),
10254  MGT->getExtensionType());
10255  }
10256 
10257  if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
10258  SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10259  return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10260  MGT->getMemoryVT(), DL, Ops,
10261  MGT->getMemOperand(), MGT->getIndexType(),
10262  MGT->getExtensionType());
10263  }
10264 
10265  return SDValue();
10266 }
10267 
10268 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10269  MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
10270  SDValue Mask = MLD->getMask();
10271  SDLoc DL(N);
10272 
10273  // Zap masked loads with a zero mask.
10275  return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10276 
10277  // If this is a masked load with an all ones mask, we can use a unmasked load.
10278  // FIXME: Can we do this for indexed, expanding, or extending loads?
10279  if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10280  !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10281  SDValue NewLd = DAG.getLoad(
10282  N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10283  MLD->getPointerInfo(), MLD->getOriginalAlign(),
10285  return CombineTo(N, NewLd, NewLd.getValue(1));
10286  }
10287 
10288  // Try transforming N to an indexed load.
10289  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10290  return SDValue(N, 0);
10291 
10292  return SDValue();
10293 }
10294 
10295 /// A vector select of 2 constant vectors can be simplified to math/logic to
10296 /// avoid a variable select instruction and possibly avoid constant loads.
10297 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10298  SDValue Cond = N->getOperand(0);
10299  SDValue N1 = N->getOperand(1);
10300  SDValue N2 = N->getOperand(2);
10301  EVT VT = N->getValueType(0);
10302  if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10303  !TLI.convertSelectOfConstantsToMath(VT) ||
10306  return SDValue();
10307 
10308  // Check if we can use the condition value to increment/decrement a single
10309  // constant value. This simplifies a select to an add and removes a constant
10310  // load/materialization from the general case.
10311  bool AllAddOne = true;
10312  bool AllSubOne = true;
10313  unsigned Elts = VT.getVectorNumElements();
10314  for (unsigned i = 0; i != Elts; ++i) {
10315  SDValue N1Elt = N1.getOperand(i);
10316  SDValue N2Elt = N2.getOperand(i);
10317  if (N1Elt.isUndef() || N2Elt.isUndef())
10318  continue;
10319  if (N1Elt.getValueType() != N2Elt.getValueType())
10320  continue;
10321 
10322  const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10323  const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10324  if (C1 != C2 + 1)
10325  AllAddOne = false;
10326  if (C1 != C2 - 1)
10327  AllSubOne = false;
10328  }
10329 
10330  // Further simplifications for the extra-special cases where the constants are
10331  // all 0 or all -1 should be implemented as folds of these patterns.
10332  SDLoc DL(N);
10333  if (AllAddOne || AllSubOne) {
10334  // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10335  // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10336  auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10337  SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10338  return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10339  }
10340 
10341  // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10342  APInt Pow2C;
10343  if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10344  isNullOrNullSplat(N2)) {
10345  SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
10346  SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10347  return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10348  }
10349 
10350  if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10351  return V;
10352 
10353  // The general case for select-of-constants:
10354  // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10355  // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10356  // leave that to a machine-specific pass.
10357  return SDValue();
10358 }
10359 
10360 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10361  SDValue N0 = N->getOperand(0);
10362  SDValue N1 = N->getOperand(1);
10363  SDValue N2 = N->getOperand(2);
10364  EVT VT = N->getValueType(0);
10365  SDLoc DL(N);
10366 
10367  if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10368  return V;
10369 
10370  if (SDValue V = foldBoolSelectToLogic(N, DAG))
10371  return V;
10372 
10373  // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10374  if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10375  return DAG.getSelect(DL, VT, F, N2, N1);
10376 
10377  // Canonicalize integer abs.
10378  // vselect (setg[te] X, 0), X, -X ->
10379  // vselect (setgt X, -1), X, -X ->
10380  // vselect (setl[te] X, 0), -X, X ->
10381  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10382  if (N0.getOpcode() == ISD::SETCC) {
10383  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10384  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10385  bool isAbs = false;
10386  bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10387 
10388  if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10389  (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10390  N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10392  else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10393  N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10395 
10396  if (isAbs) {
10397  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
10398  return DAG.getNode(ISD::ABS, DL, VT, LHS);
10399 
10400  SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10401  DAG.getConstant(VT.getScalarSizeInBits() - 1,
10402  DL, getShiftAmountTy(VT)));
10403  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10404  AddToWorklist(Shift.getNode());
10405  AddToWorklist(Add.getNode());
10406  return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10407  }
10408 
10409  // vselect x, y (fcmp lt x, y) -> fminnum x, y
10410  // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10411  //
10412  // This is OK if we don't care about what happens if either operand is a
10413  // NaN.
10414  //
10415  if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10416  if (SDValue FMinMax =
10417  combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10418  return FMinMax;
10419  }
10420 
10421  if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10422  return S;
10423  if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10424  return S;
10425 
10426  // If this select has a condition (setcc) with narrower operands than the
10427  // select, try to widen the compare to match the select width.
10428  // TODO: This should be extended to handle any constant.
10429  // TODO: This could be extended to handle non-loading patterns, but that
10430  // requires thorough testing to avoid regressions.
10431  if (isNullOrNullSplat(RHS)) {
10432  EVT NarrowVT = LHS.getValueType();
10434  EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10435  unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10436  unsigned WideWidth = WideVT.getScalarSizeInBits();
10437  bool IsSigned = isSignedIntSetCC(CC);
10438  auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10439  if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10440  SetCCWidth != 1 && SetCCWidth < WideWidth &&
10441  TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10442  TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10443  // Both compare operands can be widened for free. The LHS can use an
10444  // extended load, and the RHS is a constant:
10445  // vselect (ext (setcc load(X), C)), N1, N2 -->
10446  // vselect (setcc extload(X), C'), N1, N2
10447  auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10448  SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10449  SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10450  EVT WideSetCCVT = getSetCCResultType(WideVT);
10451  SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10452  return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10453  }
10454  }
10455 
10456  // Match VSELECTs into add with unsigned saturation.
10457  if (hasOperation(ISD::UADDSAT, VT)) {
10458  // Check if one of the arms of the VSELECT is vector with all bits set.
10459  // If it's on the left side invert the predicate to simplify logic below.
10460  SDValue Other;
10461  ISD::CondCode SatCC = CC;
10463  Other = N2;
10464  SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10465  } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10466  Other = N1;
10467  }
10468 
10469  if (Other && Other.getOpcode() == ISD::ADD) {
10470  SDValue CondLHS = LHS, CondRHS = RHS;
10471  SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10472 
10473  // Canonicalize condition operands.
10474  if (SatCC == ISD::SETUGE) {
10475  std::swap(CondLHS, CondRHS);
10476  SatCC = ISD::SETULE;
10477  }
10478 
10479  // We can test against either of the addition operands.
10480  // x <= x+y ? x+y : ~0 --> uaddsat x, y
10481  // x+y >= x ? x+y : ~0 --> uaddsat x, y
10482  if (SatCC == ISD::SETULE && Other == CondRHS &&
10483  (OpLHS == CondLHS || OpRHS == CondLHS))
10484  return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10485 
10486  if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10487  (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10488  OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10489  CondLHS == OpLHS) {
10490  // If the RHS is a constant we have to reverse the const
10491  // canonicalization.
10492  // x >= ~C ? x+C : ~0 --> uaddsat x, C
10493  auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10494  return Cond->getAPIntValue() == ~Op->getAPIntValue();
10495  };
10496  if (SatCC == ISD::SETULE &&
10497  ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10498  return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10499  }
10500  }
10501  }
10502 
10503  // Match VSELECTs into sub with unsigned saturation.
10504  if (hasOperation(ISD::USUBSAT, VT)) {
10505  // Check if one of the arms of the VSELECT is a zero vector. If it's on
10506  // the left side invert the predicate to simplify logic below.
10507  SDValue Other;
10508  ISD::CondCode SatCC = CC;
10510  Other = N2;
10511  SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10512  } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10513  Other = N1;
10514  }
10515 
10516  if (Other && Other.getNumOperands() == 2) {
10517  SDValue CondRHS = RHS;
10518  SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10519 
10520  if (Other.getOpcode() == ISD::SUB &&
10521  LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10522  OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10523  // Look for a general sub with unsigned saturation first.
10524  // zext(x) >= y ? x - trunc(y) : 0
10525  // --> usubsat(x,trunc(umin(y,SatLimit)))
10526  // zext(x) > y ? x - trunc(y) : 0
10527  // --> usubsat(x,trunc(umin(y,SatLimit)))
10528  if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10529  return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10530  DL);
10531  }
10532 
10533  if (OpLHS == LHS) {
10534  // Look for a general sub with unsigned saturation first.
10535  // x >= y ? x-y : 0 --> usubsat x, y
10536  // x > y ? x-y : 0 --> usubsat x, y
10537  if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10538  Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10539  return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10540 
10541  if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10542  OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10543  if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10544  CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10545  // If the RHS is a constant we have to reverse the const
10546  // canonicalization.
10547  // x > C-1 ? x+-C : 0 --> usubsat x, C
10548  auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10549  return (!Op && !Cond) ||
10550  (Op && Cond &&
10551  Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10552  };
10553  if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10554  ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10555  /*AllowUndefs*/ true)) {
10556  OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10557  DAG.getConstant(0, DL, VT), OpRHS);
10558  return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10559  }
10560 
10561  // Another special case: If C was a sign bit, the sub has been
10562  // canonicalized into a xor.
10563  // FIXME: Would it be better to use computeKnownBits to determine
10564  // whether it's safe to decanonicalize the xor?
10565  // x s< 0 ? x^C : 0 --> usubsat x, C
10566  APInt SplatValue;
10567  if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10568  ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10570  SplatValue.isSignMask()) {
10571  // Note that we have to rebuild the RHS constant here to
10572  // ensure we don't rely on particular values of undef lanes.
10573  OpRHS = DAG.getConstant(SplatValue, DL, VT);
10574  return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10575  }
10576  }
10577  }
10578  }
10579  }
10580  }
10581  }
10582 
10583  if (SimplifySelectOps(N, N1, N2))
10584  return SDValue(N, 0); // Don't revisit N.
10585 
10586  // Fold (vselect all_ones, N1, N2) -> N1
10588  return N1;
10589  // Fold (vselect all_zeros, N1, N2) -> N2
10591  return N2;
10592 
10593  // The ConvertSelectToConcatVector function is assuming both the above
10594  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10595  // and addressed.
10596  if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10597  N2.getOpcode() == ISD::CONCAT_VECTORS &&
10599  if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10600  return CV;
10601  }
10602 
10603  if (SDValue V = foldVSelectOfConstants(N))
10604  return V;
10605 
10606  if (hasOperation(ISD::SRA, VT))
10607  if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
10608  return V;
10609 
10610  return SDValue();
10611 }
10612 
10613 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10614  SDValue N0 = N->getOperand(0);
10615  SDValue N1 = N->getOperand(1);
10616  SDValue N2 = N->getOperand(2);
10617  SDValue N3 = N->getOperand(3);
10618  SDValue N4 = N->getOperand(4);
10619  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10620 
10621  // fold select_cc lhs, rhs, x, x, cc -> x
10622  if (N2 == N3)
10623  return N2;
10624 
10625  // Determine if the condition we're dealing with is constant
10627  CC, SDLoc(N), false)) {
10628  AddToWorklist(SCC.getNode());
10629 
10630  if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10631  if (!SCCC->isZero())
10632  return N2; // cond always true -> true val
10633  else
10634  return N3; // cond always false -> false val
10635  } else if (SCC->isUndef()) {
10636  // When the condition is UNDEF, just return the first operand. This is
10637  // coherent the DAG creation, no setcc node is created in this case
10638  return N2;
10639  } else if (SCC.getOpcode() == ISD::SETCC) {
10640  // Fold to a simpler select_cc
10641  SDValue SelectOp = DAG.getNode(
10642  ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10643  SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10644  SelectOp->setFlags(SCC->getFlags());
10645  return SelectOp;
10646  }
10647  }
10648 
10649  // If we can fold this based on the true/false value, do so.
10650  if (SimplifySelectOps(N, N2, N3))
10651  return SDValue(N, 0); // Don't revisit N.
10652 
10653  // fold select_cc into other things, such as min/max/abs
10654  return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10655 }
10656 
10657 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10658  // setcc is very commonly used as an argument to brcond. This pattern
10659  // also lend itself to numerous combines and, as a result, it is desired
10660  // we keep the argument to a brcond as a setcc as much as possible.
10661  bool PreferSetCC =
10662  N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10663 
10664  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10665  EVT VT = N->getValueType(0);
10666 
10667  // SETCC(FREEZE(X), CONST, Cond)
10668  // =>
10669  // FREEZE(SETCC(X, CONST, Cond))
10670  // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10671  // isn't equivalent to true or false.
10672  // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10673  // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10674  //
10675  // This transformation is beneficial because visitBRCOND can fold
10676  // BRCOND(FREEZE(X)) to BRCOND(X).
10677 
10678  // Conservatively optimize integer comparisons only.
10679  if (PreferSetCC) {
10680  // Do this only when SETCC is going to be used by BRCOND.
10681 
10682  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10683  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10684  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10685  bool Updated = false;
10686 
10687  // Is 'X Cond C' always true or false?
10688  auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10689  bool False = (Cond == ISD::SETULT && C->isZero()) ||
10690  (Cond == ISD::SETLT && C->isMinSignedValue()) ||
10691  (Cond == ISD::SETUGT && C->isAllOnes()) ||
10692  (Cond == ISD::SETGT && C->isMaxSignedValue());
10693  bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
10694  (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
10695  (Cond == ISD::SETUGE && C->isZero()) ||
10696  (Cond == ISD::SETGE && C->isMinSignedValue());
10697  return True || False;
10698  };
10699 
10700  if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10701  if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10702  N0 = N0->getOperand(0);
10703  Updated = true;
10704  }
10705  }
10706  if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10707  if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10708  N0C)) {
10709  N1 = N1->getOperand(0);
10710  Updated = true;
10711  }
10712  }
10713 
10714  if (Updated)
10715  return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10716  }
10717 
10718  SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10719  SDLoc(N), !PreferSetCC);
10720 
10721  if (!Combined)
10722  return SDValue();
10723 
10724  // If we prefer to have a setcc, and we don't, we'll try our best to
10725  // recreate one using rebuildSetCC.
10726  if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10727  SDValue NewSetCC = rebuildSetCC(Combined);
10728 
10729  // We don't have anything interesting to combine to.
10730  if (NewSetCC.getNode() == N)
10731  return SDValue();
10732 
10733  if (NewSetCC)
10734  return NewSetCC;
10735  }
10736 
10737  return Combined;
10738 }
10739 
10740 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10741  SDValue LHS = N->getOperand(0);
10742  SDValue RHS = N->getOperand(1);
10743  SDValue Carry = N->getOperand(2);
10744  SDValue Cond = N->getOperand(3);
10745 
10746  // If Carry is false, fold to a regular SETCC.
10747  if (isNullConstant(Carry))
10748  return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10749 
10750  return SDValue();
10751 }
10752 
10753 /// Check if N satisfies:
10754 /// N is used once.
10755 /// N is a Load.
10756 /// The load is compatible with ExtOpcode. It means
10757 /// If load has explicit zero/sign extension, ExpOpcode must have the same
10758 /// extension.
10759 /// Otherwise returns true.
10760 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10761  if (!N.hasOneUse())
10762  return false;
10763 
10764  if (!isa<LoadSDNode>(N))
10765  return false;
10766 
10767  LoadSDNode *Load = cast<LoadSDNode>(N);
10768  ISD::LoadExtType LoadExt = Load->getExtensionType();
10769  if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10770  return true;
10771 
10772  // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10773  // extension.
10774  if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10775  (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10776  return false;
10777 
10778  return true;
10779 }
10780 
10781 /// Fold
10782 /// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10783 /// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10784 /// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10785 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10786 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10788  SelectionDAG &DAG) {
10789  unsigned Opcode = N->getOpcode();
10790  SDValue N0 = N->getOperand(0);
10791  EVT VT = N->getValueType(0);
10792  SDLoc DL(N);
10793 
10794  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10795  Opcode == ISD::ANY_EXTEND) &&
10796  "Expected EXTEND dag node in input!");
10797 
10798  if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10799  !N0.hasOneUse())
10800  return SDValue();
10801 
10802  SDValue Op1 = N0->getOperand(1);
10803  SDValue Op2 = N0->getOperand(2);
10804  if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10805  return SDValue();
10806 
10807  auto ExtLoadOpcode = ISD::EXTLOAD;
10808  if (Opcode == ISD::SIGN_EXTEND)
10809  ExtLoadOpcode = ISD::SEXTLOAD;
10810  else if (Opcode == ISD::ZERO_EXTEND)
10811  ExtLoadOpcode = ISD::ZEXTLOAD;
10812 
10813  LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10814  LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10815  if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10816  !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10817  return SDValue();
10818 
10819  SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10820  SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10821  return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10822 }
10823 
10824 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10825 /// a build_vector of constants.
10826 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10827 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10828 /// Vector extends are not folded if operations are legal; this is to
10829 /// avoid introducing illegal build_vector dag nodes.
10831  SelectionDAG &DAG, bool LegalTypes) {
10832  unsigned Opcode = N->getOpcode();
10833  SDValue N0 = N->getOperand(0);
10834  EVT VT = N->getValueType(0);
10835  SDLoc DL(N);
10836 
10837  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10838  Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10840  && "Expected EXTEND dag node in input!");
10841 
10842  // fold (sext c1) -> c1
10843  // fold (zext c1) -> c1
10844  // fold (aext c1) -> c1
10845  if (isa<ConstantSDNode>(N0))
10846  return DAG.getNode(Opcode, DL, VT, N0);
10847 
10848  // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10849  // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10850  // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10851  if (N0->getOpcode() == ISD::SELECT) {
10852  SDValue Op1 = N0->getOperand(1);
10853  SDValue Op2 = N0->getOperand(2);
10854  if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10855  (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10856  // For any_extend, choose sign extension of the constants to allow a
10857  // possible further transform to sign_extend_inreg.i.e.
10858  //
10859  // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10860  // t2: i64 = any_extend t1
10861  // -->
10862  // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10863  // -->
10864  // t4: i64 = sign_extend_inreg t3
10865  unsigned FoldOpc = Opcode;
10866  if (FoldOpc == ISD::ANY_EXTEND)
10867  FoldOpc = ISD::SIGN_EXTEND;
10868  return DAG.getSelect(DL, VT, N0->getOperand(0),
10869  DAG.getNode(FoldOpc, DL, VT, Op1),
10870  DAG.getNode(FoldOpc, DL, VT, Op2));
10871  }
10872  }
10873 
10874  // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10875  // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10876  // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10877  EVT SVT = VT.getScalarType();
10878  if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10880  return SDValue();
10881 
10882  // We can fold this node into a build_vector.
10883  unsigned VTBits = SVT.getSizeInBits();
10884  unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10886  unsigned NumElts = VT.getVectorNumElements();
10887 
10888  // For zero-extensions, UNDEF elements still guarantee to have the upper
10889  // bits set to zero.
10890  bool IsZext =
10891  Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10892 
10893  for (unsigned i = 0; i != NumElts; ++i) {
10894  SDValue Op = N0.getOperand(i);
10895  if (Op.isUndef()) {
10896  Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10897  continue;
10898  }
10899 
10900  SDLoc DL(Op);
10901  // Get the constant value and if needed trunc it to the size of the type.
10902  // Nodes like build_vector might have constants wider than the scalar type.
10903  APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10904  if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10905  Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10906  else
10907  Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10908  }
10909 
10910  return DAG.getBuildVector(VT, DL, Elts);
10911 }
10912 
10913 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10914 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10915 // transformation. Returns true if extension are possible and the above
10916 // mentioned transformation is profitable.
10918  unsigned ExtOpc,
10919  SmallVectorImpl<SDNode *> &ExtendNodes,
10920  const TargetLowering &TLI) {
10921  bool HasCopyToRegUses = false;
10922  bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10923  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10924  UE = N0.getNode()->use_end();
10925  UI != UE; ++UI) {
10926  SDNode *User = *UI;
10927  if (User == N)
10928  continue;
10929  if (UI.getUse().getResNo() != N0.getResNo())
10930  continue;
10931  // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10932  if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10933  ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10934  if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10935  // Sign bits will be lost after a zext.
10936  return false;
10937  bool Add = false;
10938  for (unsigned i = 0; i != 2; ++i) {
10939  SDValue UseOp = User->getOperand(i);
10940  if (UseOp == N0)
10941  continue;
10942  if (!isa<ConstantSDNode>(UseOp))
10943  return false;
10944  Add = true;
10945  }
10946  if (Add)
10947  ExtendNodes.push_back(User);
10948  continue;
10949  }
10950  // If truncates aren't free and there are users we can't
10951  // extend, it isn't worthwhile.
10952  if (!isTruncFree)
10953  return false;
10954  // Remember if this value is live-out.
10955  if (User->getOpcode() == ISD::CopyToReg)
10956  HasCopyToRegUses = true;
10957  }
10958 
10959  if (HasCopyToRegUses) {
10960  bool BothLiveOut = false;
10961  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10962  UI != UE; ++UI) {
10963  SDUse &Use = UI.getUse();
10964  if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10965  BothLiveOut = true;
10966  break;
10967  }
10968  }
10969  if (BothLiveOut)
10970  // Both unextended and extended values are live out. There had better be
10971  // a good reason for the transformation.
10972  return ExtendNodes.size();
10973  }
10974  return true;
10975 }
10976 
10977 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10978  SDValue OrigLoad, SDValue ExtLoad,
10979  ISD::NodeType ExtType) {
10980  // Extend SetCC uses if necessary.
10981  SDLoc DL(ExtLoad);
10982  for (SDNode *SetCC : SetCCs) {
10984 
10985  for (unsigned j = 0; j != 2; ++j) {
10986  SDValue SOp = SetCC->getOperand(j);
10987  if (SOp == OrigLoad)
10988  Ops.push_back(ExtLoad);
10989  else
10990  Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10991  }
10992 
10993  Ops.push_back(SetCC->getOperand(2));
10994  CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10995  }
10996 }
10997 
10998 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10999 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
11000  SDValue N0 = N->getOperand(0);
11001  EVT DstVT = N->getValueType(0);
11002  EVT SrcVT = N0.getValueType();
11003 
11004  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11005  N->getOpcode() == ISD::ZERO_EXTEND) &&
11006  "Unexpected node type (not an extend)!");
11007 
11008  // fold (sext (load x)) to multiple smaller sextloads; same for zext.
11009  // For example, on a target with legal v4i32, but illegal v8i32, turn:
11010  // (v8i32 (sext (v8i16 (load x))))
11011  // into:
11012  // (v8i32 (concat_vectors (v4i32 (sextload x)),
11013  // (v4i32 (sextload (x + 16)))))
11014  // Where uses of the original load, i.e.:
11015  // (v8i16 (load x))
11016  // are replaced with:
11017  // (v8i16 (truncate
11018  // (v8i32 (concat_vectors (v4i32 (sextload x)),
11019  // (v4i32 (sextload (x + 16)))))))
11020  //
11021  // This combine is only applicable to illegal, but splittable, vectors.
11022  // All legal types, and illegal non-vector types, are handled elsewhere.
11023  // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
11024  //
11025  if (N0->getOpcode() != ISD::LOAD)
11026  return SDValue();
11027 
11028  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11029 
11030  if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
11031  !N0.hasOneUse() || !LN0->isSimple() ||
11032  !DstVT.isVector() || !DstVT.isPow2VectorType() ||
11034  return SDValue();
11035 
11036  SmallVector<SDNode *, 4> SetCCs;
11037  if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
11038  return SDValue();
11039 
11040  ISD::LoadExtType ExtType =
11041  N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11042 
11043  // Try to split the vector types to get down to legal types.
11044  EVT SplitSrcVT = SrcVT;
11045  EVT SplitDstVT = DstVT;
11046  while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
11047  SplitSrcVT.getVectorNumElements() > 1) {
11048  SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
11049  SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
11050  }
11051 
11052  if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
11053  return SDValue();
11054 
11055  assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
11056 
11057  SDLoc DL(N);
11058  const unsigned NumSplits =
11059  DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
11060  const unsigned Stride = SplitSrcVT.getStoreSize();
11062  SmallVector<SDValue, 4> Chains;
11063 
11064  SDValue BasePtr = LN0->getBasePtr();
11065  for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
11066  const unsigned Offset = Idx * Stride;
11067  const Align Align = commonAlignment(LN0->getAlign(), Offset);
11068 
11069  SDValue SplitLoad = DAG.getExtLoad(
11070  ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
11071  LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
11072  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11073 
11074  BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
11075 
11076  Loads.push_back(SplitLoad.getValue(0));
11077  Chains.push_back(SplitLoad.getValue(1));
11078  }
11079 
11080  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11081  SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
11082 
11083  // Simplify TF.
11084  AddToWorklist(NewChain.getNode());
11085 
11086  CombineTo(N, NewValue);
11087 
11088  // Replace uses of the original load (before extension)
11089  // with a truncate of the concatenated sextloaded vectors.
11090  SDValue Trunc =
11091  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
11092  ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
11093  CombineTo(N0.getNode(), Trunc, NewChain);
11094  return SDValue(N, 0); // Return N so it doesn't get rechecked!
11095 }
11096 
11097 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11098 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11099 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
11100  assert(N->getOpcode() == ISD::ZERO_EXTEND);
11101  EVT VT = N->getValueType(0);
11102  EVT OrigVT = N->getOperand(0).getValueType();
11103  if (TLI.isZExtFree(OrigVT, VT))
11104  return SDValue();
11105 
11106  // and/or/xor
11107  SDValue N0 = N->getOperand(0);
11108  if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11109  N0.getOpcode() == ISD::XOR) ||
11110  N0.getOperand(1).getOpcode() != ISD::Constant ||
11111  (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
11112  return SDValue();
11113 
11114  // shl/shr
11115  SDValue N1 = N0->getOperand(0);
11116  if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11117  N1.getOperand(1).getOpcode() != ISD::Constant ||
11118  (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11119  return SDValue();
11120 
11121  // load
11122  if (!isa<LoadSDNode>(N1.getOperand(0)))
11123  return SDValue();
11124  LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11125  EVT MemVT = Load->getMemoryVT();
11126  if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11127  Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11128  return SDValue();
11129 
11130 
11131  // If the shift op is SHL, the logic op must be AND, otherwise the result
11132  // will be wrong.
11133  if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11134  return SDValue();
11135 
11136  if (!N0.hasOneUse() || !N1.hasOneUse())
11137  return SDValue();
11138 
11139  SmallVector<SDNode*, 4> SetCCs;
11140  if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11141  ISD::ZERO_EXTEND, SetCCs, TLI))
11142  return SDValue();
11143 
11144  // Actually do the transformation.
11145  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11146  Load->getChain(), Load->getBasePtr(),
11147  Load->getMemoryVT(), Load->getMemOperand());
11148 
11149  SDLoc DL1(N1);
11150  SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11151  N1.getOperand(1));
11152 
11154  SDLoc DL0(N0);
11155  SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11156  DAG.getConstant(Mask, DL0, VT));
11157 
11158  ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11159  CombineTo(N, And);
11160  if (SDValue(Load, 0).hasOneUse()) {
11161  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11162  } else {
11163  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11164  Load->getValueType(0), ExtLoad);
11165  CombineTo(Load, Trunc, ExtLoad.getValue(1));
11166  }
11167 
11168  // N0 is dead at this point.
11169  recursivelyDeleteUnusedNodes(N0.getNode());
11170 
11171  return SDValue(N,0); // Return N so it doesn't get rechecked!
11172 }
11173 
11174 /// If we're narrowing or widening the result of a vector select and the final
11175 /// size is the same size as a setcc (compare) feeding the select, then try to
11176 /// apply the cast operation to the select's operands because matching vector
11177 /// sizes for a select condition and other operands should be more efficient.
11178 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11179  unsigned CastOpcode = Cast->getOpcode();
11180  assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
11181  CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
11182  CastOpcode == ISD::FP_ROUND) &&
11183  "Unexpected opcode for vector select narrowing/widening");
11184 
11185  // We only do this transform before legal ops because the pattern may be
11186  // obfuscated by target-specific operations after legalization. Do not create
11187  // an illegal select op, however, because that may be difficult to lower.
11188  EVT VT = Cast->getValueType(0);
11189  if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11190  return SDValue();
11191 
11192  SDValue VSel = Cast->getOperand(0);
11193  if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11194  VSel.getOperand(0).getOpcode() != ISD::SETCC)
11195  return SDValue();
11196 
11197  // Does the setcc have the same vector size as the casted select?
11198  SDValue SetCC = VSel.getOperand(0);
11199  EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11200  if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11201  return SDValue();
11202 
11203  // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11204  SDValue A = VSel.getOperand(1);
11205  SDValue B = VSel.getOperand(2);
11206  SDValue CastA, CastB;
11207  SDLoc DL(Cast);
11208  if (CastOpcode == ISD::FP_ROUND) {
11209  // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11210  CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11211  CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11212  } else {
11213  CastA = DAG.getNode(CastOpcode, DL, VT, A);
11214  CastB = DAG.getNode(CastOpcode, DL, VT, B);
11215  }
11216  return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11217 }
11218 
11219 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11220 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11222  const TargetLowering &TLI, EVT VT,
11223  bool LegalOperations, SDNode *N,
11224  SDValue N0, ISD::LoadExtType ExtLoadType) {
11225  SDNode *N0Node = N0.getNode();
11226  bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
11227  : ISD::isZEXTLoad(N0Node);
11228  if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11229  !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
11230  return SDValue();
11231 
11232  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11233  EVT MemVT = LN0->getMemoryVT();
11234  if ((LegalOperations || !LN0->isSimple() ||
11235  VT.isVector()) &&
11236  !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11237  return SDValue();
11238 
11239  SDValue ExtLoad =
11240  DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11241  LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11242  Combiner.CombineTo(N, ExtLoad);
11243  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11244  if (LN0->use_empty())
11245  Combiner.recursivelyDeleteUnusedNodes(LN0);
11246  return SDValue(N, 0); // Return N so it doesn't get rechecked!
11247 }
11248 
11249 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11250 // Only generate vector extloads when 1) they're legal, and 2) they are
11251 // deemed desirable by the target.
11253  const TargetLowering &TLI, EVT VT,
11254  bool LegalOperations, SDNode *N, SDValue N0,
11255  ISD::LoadExtType ExtLoadType,
11256  ISD::NodeType ExtOpc) {
11257  if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11258  !ISD::isUNINDEXEDLoad(N0.getNode()) ||
11259  ((LegalOperations || VT.isVector() ||
11260  !cast<LoadSDNode>(N0)->isSimple()) &&
11261  !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11262  return {};
11263 
11264  bool DoXform = true;
11265  SmallVector<SDNode *, 4> SetCCs;
11266  if (!N0.hasOneUse())
11267  DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11268  if (VT.isVector())
11269  DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
11270  if (!DoXform)
11271  return {};
11272 
11273  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11274  SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11275  LN0->getBasePtr(), N0.getValueType(),
11276  LN0->getMemOperand());
11277  Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11278  // If the load value is used only by N, replace it via CombineTo N.
11279  bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11280  Combiner.CombineTo(N, ExtLoad);
11281  if (NoReplaceTrunc) {
11282  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11283  Combiner.recursivelyDeleteUnusedNodes(LN0);
11284  } else {
11285  SDValue Trunc =
11286  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11287  Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11288  }
11289  return SDValue(N, 0); // Return N so it doesn't get rechecked!
11290 }
11291 
11293  const TargetLowering &TLI, EVT VT,
11294  SDNode *N, SDValue N0,
11295  ISD::LoadExtType ExtLoadType,
11296  ISD::NodeType ExtOpc) {
11297  if (!N0.hasOneUse())
11298  return SDValue();
11299 
11300  MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
11301  if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11302  return SDValue();
11303 
11304  if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11305  return SDValue();
11306 
11307  if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11308  return SDValue();
11309 
11310  SDLoc dl(Ld);
11311  SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11312  SDValue NewLoad = DAG.getMaskedLoad(
11313  VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11314  PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11315  ExtLoadType, Ld->isExpandingLoad());
11316  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11317  return NewLoad;
11318 }
11319 
11321  bool LegalOperations) {
11322  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11323  N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
11324 
11325  SDValue SetCC = N->getOperand(0);
11326  if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11327  !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11328  return SDValue();
11329 
11330  SDValue X = SetCC.getOperand(0);
11331  SDValue Ones = SetCC.getOperand(1);
11332  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11333  EVT VT = N->getValueType(0);
11334  EVT XVT = X.getValueType();
11335  // setge X, C is canonicalized to setgt, so we do not need to match that
11336  // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11337  // not require the 'not' op.
11338  if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11339  // Invert and smear/shift the sign bit:
11340  // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11341  // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11342  SDLoc DL(N);
11343  unsigned ShCt = VT.getSizeInBits() - 1;
11344  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11345  if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11346  SDValue NotX = DAG.getNOT(DL, X, VT);
11347  SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11348  auto ShiftOpcode =
11349  N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11350  return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11351  }
11352  }
11353  return SDValue();
11354 }
11355 
11356 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11357  SDValue N0 = N->getOperand(0);
11358  if (N0.getOpcode() != ISD::SETCC)
11359  return SDValue();
11360 
11361  SDValue N00 = N0.getOperand(0);
11362  SDValue N01 = N0.getOperand(1);
11363  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11364  EVT VT = N->getValueType(0);
11365  EVT N00VT = N00.getValueType();
11366  SDLoc DL(N);
11367 
11368  // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11369  // the same size as the compared operands. Try to optimize sext(setcc())
11370  // if this is the case.
11371  if (VT.isVector() && !LegalOperations &&
11372  TLI.getBooleanContents(N00VT) ==
11374  EVT SVT = getSetCCResultType(N00VT);
11375 
11376  // If we already have the desired type, don't change it.
11377  if (SVT != N0.getValueType()) {
11378  // We know that the # elements of the results is the same as the
11379  // # elements of the compare (and the # elements of the compare result
11380  // for that matter). Check to see that they are the same size. If so,
11381  // we know that the element size of the sext'd result matches the
11382  // element size of the compare operands.
11383  if (VT.getSizeInBits() == SVT.getSizeInBits())
11384  return DAG.getSetCC(DL, VT, N00, N01, CC);
11385 
11386  // If the desired elements are smaller or larger than the source
11387  // elements, we can use a matching integer vector type and then
11388  // truncate/sign extend.
11389  EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11390  if (SVT == MatchingVecType) {
11391  SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
11392  return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11393  }
11394  }
11395 
11396  // Try to eliminate the sext of a setcc by zexting the compare operands.
11397  if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11398  !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
11399  bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
11400  unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11401  unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11402 
11403  // We have an unsupported narrow vector compare op that would be legal
11404  // if extended to the destination type. See if the compare operands
11405  // can be freely extended to the destination type.
11406  auto IsFreeToExtend = [&](SDValue V) {
11407  if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11408  return true;
11409  // Match a simple, non-extended load that can be converted to a
11410  // legal {z/s}ext-load.
11411  // TODO: Allow widening of an existing {z/s}ext-load?
11412  if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11414  cast<LoadSDNode>(V)->isSimple() &&
11415  TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11416  return false;
11417 
11418  // Non-chain users of this value must either be the setcc in this
11419  // sequence or extends that can be folded into the new {z/s}ext-load.
11420  for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11421  UI != UE; ++UI) {
11422  // Skip uses of the chain and the setcc.
11423  SDNode *User = *UI;
11424  if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11425  continue;
11426  // Extra users must have exactly the same cast we are about to create.
11427  // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11428  // is enhanced similarly.
11429  if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11430  return false;
11431  }
11432  return true;
11433  };
11434 
11435  if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11436  SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11437  SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11438  return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11439  }
11440  }
11441  }
11442 
11443  // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11444  // Here, T can be 1 or -1, depending on the type of the setcc and
11445  // getBooleanContents().
11446  unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11447 
11448  // To determine the "true" side of the select, we need to know the high bit
11449  // of the value returned by the setcc if it evaluates to true.
11450  // If the type of the setcc is i1, then the true case of the select is just
11451  // sext(i1 1), that is, -1.
11452  // If the type of the setcc is larger (say, i8) then the value of the high
11453  // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11454  // of the appropriate width.
11455  SDValue ExtTrueVal = (SetCCWidth == 1)
11456  ? DAG.getAllOnesConstant(DL, VT)
11457  : DAG.getBoolConstant(true, DL, VT, N00VT);
11458  SDValue Zero = DAG.getConstant(0, DL, VT);
11459  if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11460  return SCC;
11461 
11462  if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11463  EVT SetCCVT = getSetCCResultType(N00VT);
11464  // Don't do this transform for i1 because there's a select transform
11465  // that would reverse it.
11466  // TODO: We should not do this transform at all without a target hook
11467  // because a sext is likely cheaper than a select?
11468  if (SetCCVT.getScalarSizeInBits() != 1 &&
11469  (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11470  SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11471  return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11472  }
11473  }
11474 
11475  return SDValue();
11476 }
11477 
11478 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11479  SDValue N0 = N->getOperand(0);
11480  EVT VT = N->getValueType(0);
11481  SDLoc DL(N);
11482 
11483  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11484  return Res;
11485 
11486  // fold (sext (sext x)) -> (sext x)
11487  // fold (sext (aext x)) -> (sext x)
11488  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11489  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11490 
11491  if (N0.getOpcode() == ISD::TRUNCATE) {
11492  // fold (sext (truncate (load x))) -> (sext (smaller load x))
11493  // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11494  if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11495  SDNode *oye = N0.getOperand(0).getNode();
11496  if (NarrowLoad.getNode() != N0.getNode()) {
11497  CombineTo(N0.getNode(), NarrowLoad);
11498  // CombineTo deleted the truncate, if needed, but not what's under it.
11499  AddToWorklist(oye);
11500  }
11501  return SDValue(N, 0); // Return N so it doesn't get rechecked!
11502  }
11503 
11504  // See if the value being truncated is already sign extended. If so, just
11505  // eliminate the trunc/sext pair.
11506  SDValue Op = N0.getOperand(0);
11507  unsigned OpBits = Op.getScalarValueSizeInBits();
11508  unsigned MidBits = N0.getScalarValueSizeInBits();
11509  unsigned DestBits = VT.getScalarSizeInBits();
11510  unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11511 
11512  if (OpBits == DestBits) {
11513  // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
11514  // bits, it is already ready.
11515  if (NumSignBits > DestBits-MidBits)
11516  return Op;
11517  } else if (OpBits < DestBits) {
11518  // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
11519  // bits, just sext from i32.
11520  if (NumSignBits > OpBits-MidBits)
11521  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11522  } else {
11523  // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
11524  // bits, just truncate to i32.
11525  if (NumSignBits > OpBits-MidBits)
11526  return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11527  }
11528 
11529  // fold (sext (truncate x)) -> (sextinreg x).
11530  if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11531  N0.getValueType())) {
11532  if (OpBits < DestBits)
11533  Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11534  else if (OpBits > DestBits)
11535  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11536  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11537  DAG.getValueType(N0.getValueType()));
11538  }
11539  }
11540 
11541  // Try to simplify (sext (load x)).
11542  if (SDValue foldedExt =
11543  tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11545  return foldedExt;
11546 
11547  if (SDValue foldedExt =
11548  tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11550  return foldedExt;
11551 
11552  // fold (sext (load x)) to multiple smaller sextloads.
11553  // Only on illegal but splittable vectors.
11554  if (SDValue ExtLoad = CombineExtLoad(N))
11555  return ExtLoad;
11556 
11557  // Try to simplify (sext (sextload x)).
11558  if (SDValue foldedExt = tryToFoldExtOfExtload(
11559  DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11560  return foldedExt;
11561 
11562  // fold (sext (and/or/xor (load x), cst)) ->
11563  // (and/or/xor (sextload x), (sext cst))
11564  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11565  N0.getOpcode() == ISD::XOR) &&
11566  isa<LoadSDNode>(N0.getOperand(0)) &&
11567  N0.getOperand(1).getOpcode() == ISD::Constant &&
11568  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11569  LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11570  EVT MemVT = LN00->getMemoryVT();
11571  if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11572  LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11573  SmallVector<SDNode*, 4> SetCCs;
11574  bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11575  ISD::SIGN_EXTEND, SetCCs, TLI);
11576  if (DoXform) {
11577  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11578  LN00->getChain(), LN00->getBasePtr(),
11579  LN00->getMemoryVT(),
11580  LN00->getMemOperand());
11582  SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11583  ExtLoad, DAG.getConstant(Mask, DL, VT));
11584  ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11585  bool NoReplaceTruncAnd = !N0.hasOneUse();
11586  bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11587  CombineTo(N, And);
11588  // If N0 has multiple uses, change other uses as well.
11589  if (NoReplaceTruncAnd) {
11590  SDValue TruncAnd =
11591  DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11592  CombineTo(N0.getNode(), TruncAnd);
11593  }
11594  if (NoReplaceTrunc) {
11595  DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11596  } else {
11597  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11598  LN00->getValueType(0), ExtLoad);
11599  CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11600  }
11601  return SDValue(N,0); // Return N so it doesn't get rechecked!
11602  }
11603  }
11604  }
11605 
11606  if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11607  return V;
11608 
11609  if (SDValue V = foldSextSetcc(N))
11610  return V;
11611 
11612  // fold (sext x) -> (zext x) if the sign bit is known zero.
11613  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11614  DAG.SignBitIsZero(N0))
11615  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11616 
11617  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11618  return NewVSel;
11619 
11620  // Eliminate this sign extend by doing a negation in the destination type:
11621  // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11622  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11623  isNullOrNullSplat(N0.getOperand(0)) &&
11624  N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11626  SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11627  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11628  }
11629  // Eliminate this sign extend by doing a decrement in the destination type:
11630  // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11631  if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11633  N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11635  SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11636  return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11637  }
11638 
11639  // fold sext (not i1 X) -> add (zext i1 X), -1
11640  // TODO: This could be extended to handle bool vectors.
11641  if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11642  (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11643  TLI.isOperationLegal(ISD::ADD, VT)))) {
11644  // If we can eliminate the 'not', the sext form should be better
11645  if (SDValue NewXor = visitXOR(N0.getNode())) {
11646  // Returning N0 is a form of in-visit replacement that may have
11647  // invalidated N0.
11648  if (NewXor.getNode() == N0.getNode()) {
11649  // Return SDValue here as the xor should have already been replaced in
11650  // this sext.
11651  return SDValue();
11652  } else {
11653  // Return a new sext with the new xor.
11654  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11655  }
11656  }
11657 
11658  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11659  return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11660  }
11661 
11662  if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11663  return Res;
11664 
11665  return SDValue();
11666 }
11667 
11668 // isTruncateOf - If N is a truncate of some other value, return true, record
11669 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11670 // This function computes KnownBits to avoid a duplicated call to
11671 // computeKnownBits in the caller.
11673  KnownBits &Known) {
11674  if (N->getOpcode() == ISD::TRUNCATE) {
11675  Op = N->getOperand(0);
11676  Known = DAG.computeKnownBits(Op);
11677  return true;
11678  }
11679 
11680  if (N.getOpcode() != ISD::SETCC ||
11681  N.getValueType().getScalarType() != MVT::i1 ||
11682  cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11683  return false;
11684 
11685  SDValue Op0 = N->getOperand(0);
11686  SDValue Op1 = N->getOperand(1);
11687  assert(Op0.getValueType() == Op1.getValueType());
11688 
11689  if (isNullOrNullSplat(Op0))
11690  Op = Op1;
11691  else if (isNullOrNullSplat(Op1))
11692  Op = Op0;
11693  else
11694  return false;
11695 
11696  Known = DAG.computeKnownBits(Op);
11697 
11698  return (Known.Zero | 1).isAllOnes();
11699 }
11700 
11701 /// Given an extending node with a pop-count operand, if the target does not
11702 /// support a pop-count in the narrow source type but does support it in the
11703 /// destination type, widen the pop-count to the destination type.
11704 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11705  assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11706  Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11707 
11708  SDValue CtPop = Extend->getOperand(0);
11709  if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11710  return SDValue();
11711 
11712  EVT VT = Extend->getValueType(0);
11713  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11714  if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11716  return SDValue();
11717 
11718  // zext (ctpop X) --> ctpop (zext X)
11719  SDLoc DL(Extend);
11720  SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11721  return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11722 }
11723 
11724 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11725  SDValue N0 = N->getOperand(0);
11726  EVT VT = N->getValueType(0);
11727 
11728  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11729  return Res;
11730 
11731  // fold (zext (zext x)) -> (zext x)
11732  // fold (zext (aext x)) -> (zext x)
11733  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11734  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11735  N0.getOperand(0));
11736 
11737  // fold (zext (truncate x)) -> (zext x) or
11738  // (zext (truncate x)) -> (truncate x)
11739  // This is valid when the truncated bits of x are already zero.
11740  SDValue Op;
11741  KnownBits Known;
11742  if (isTruncateOf(DAG, N0, Op, Known)) {
11743  APInt TruncatedBits =
11744  (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11745  APInt(Op.getScalarValueSizeInBits(), 0) :
11746  APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11748  std::min(Op.getScalarValueSizeInBits(),
11749  VT.getScalarSizeInBits()));
11750  if (TruncatedBits.isSubsetOf(Known.Zero))
11751  return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11752  }
11753 
11754  // fold (zext (truncate x)) -> (and x, mask)
11755  if (N0.getOpcode() == ISD::TRUNCATE) {
11756  // fold (zext (truncate (load x))) -> (zext (smaller load x))
11757  // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11758  if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11759  SDNode *oye = N0.getOperand(0).getNode();
11760  if (NarrowLoad.getNode() != N0.getNode()) {
11761  CombineTo(N0.getNode(), NarrowLoad);
11762  // CombineTo deleted the truncate, if needed, but not what's under it.
11763  AddToWorklist(oye);
11764  }
11765  return SDValue(N, 0); // Return N so it doesn't get rechecked!
11766  }
11767 
11768  EVT SrcVT = N0.getOperand(0).getValueType();
11769  EVT MinVT = N0.getValueType();
11770 
11771  // Try to mask before the extension to avoid having to generate a larger mask,
11772  // possibly over several sub-vectors.
11773  if (SrcVT.bitsLT(VT) && VT.isVector()) {
11774  if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11775  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11776  SDValue Op = N0.getOperand(0);
11777  Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11778  AddToWorklist(Op.getNode());
11779  SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11780  // Transfer the debug info; the new node is equivalent to N0.
11781  DAG.transferDbgValues(N0, ZExtOrTrunc);
11782  return ZExtOrTrunc;
11783  }
11784  }
11785 
11786  if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11787  SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11788  AddToWorklist(Op.getNode());
11789  SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11790  // We may safely transfer the debug info describing the truncate node over
11791  // to the equivalent and operation.
11792  DAG.transferDbgValues(N0, And);
11793  return And;
11794  }
11795  }
11796 
11797  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11798  // if either of the casts is not free.
11799  if (N0.getOpcode() == ISD::AND &&
11800  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11801  N0.getOperand(1).getOpcode() == ISD::Constant &&
11803  N0.getValueType()) ||
11804  !TLI.isZExtFree(N0.getValueType(), VT))) {
11805  SDValue X = N0.getOperand(0).getOperand(0);
11806  X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11808  SDLoc DL(N);
11809  return DAG.getNode(ISD::AND, DL, VT,
11810  X, DAG.getConstant(Mask, DL, VT));
11811  }
11812 
11813  // Try to simplify (zext (load x)).
11814  if (SDValue foldedExt =
11815  tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11817  return foldedExt;
11818 
11819  if (SDValue foldedExt =
11820  tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11822  return foldedExt;
11823 
11824  // fold (zext (load x)) to multiple smaller zextloads.
11825  // Only on illegal but splittable vectors.
11826  if (SDValue ExtLoad = CombineExtLoad(N))
11827  return ExtLoad;
11828 
11829  // fold (zext (and/or/xor (load x), cst)) ->
11830  // (and/or/xor (zextload x), (zext cst))
11831  // Unless (and (load x) cst) will match as a zextload already and has
11832  // additional users.
11833  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11834  N0.getOpcode() == ISD::XOR) &&
11835  isa<LoadSDNode>(N0.getOperand(0)) &&
11836  N0.getOperand(1).getOpcode() == ISD::Constant &&
11837  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11838  LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11839  EVT MemVT = LN00->getMemoryVT();
11840  if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11841  LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11842  bool DoXform = true;
11843  SmallVector<SDNode*, 4> SetCCs;
11844  if (!N0.hasOneUse()) {
11845  if (N0.getOpcode() == ISD::AND) {
11846  auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11847  EVT LoadResultTy = AndC->getValueType(0);
11848  EVT ExtVT;
11849  if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11850  DoXform = false;
11851  }
11852  }
11853  if (DoXform)
11854  DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11855  ISD::ZERO_EXTEND, SetCCs, TLI);
11856  if (DoXform) {
11857  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11858  LN00->getChain(), LN00->getBasePtr(),
11859  LN00->getMemoryVT(),
11860  LN00->getMemOperand());
11862  SDLoc DL(N);
11863  SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11864  ExtLoad, DAG.getConstant(Mask, DL, VT));
11865  ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11866  bool NoReplaceTruncAnd = !N0.hasOneUse();
11867  bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11868  CombineTo(N, And);
11869  // If N0 has multiple uses, change other uses as well.
11870  if (NoReplaceTruncAnd) {
11871  SDValue TruncAnd =
11872  DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11873  CombineTo(N0.getNode(), TruncAnd);
11874  }
11875  if (NoReplaceTrunc) {
11876  DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11877  } else {
11878  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11879  LN00->getValueType(0), ExtLoad);
11880  CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11881  }
11882  return SDValue(N,0); // Return N so it doesn't get rechecked!
11883  }
11884  }
11885  }
11886 
11887  // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11888  // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11889  if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11890  return ZExtLoad;
11891 
11892  // Try to simplify (zext (zextload x)).
11893  if (SDValue foldedExt = tryToFoldExtOfExtload(
11894  DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11895  return foldedExt;
11896 
11897  if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11898  return V;
11899 
11900  if (N0.getOpcode() == ISD::SETCC) {
11901  // Only do this before legalize for now.
11902  if (!LegalOperations && VT.isVector() &&
11904  EVT N00VT = N0.getOperand(0).getValueType();
11905  if (getSetCCResultType(N00VT) == N0.getValueType())
11906  return SDValue();
11907 
11908  // We know that the # elements of the results is the same as the #
11909  // elements of the compare (and the # elements of the compare result for
11910  // that matter). Check to see that they are the same size. If so, we know
11911  // that the element size of the sext'd result matches the element size of
11912  // the compare operands.
11913  SDLoc DL(N);
11914  if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11915  // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11916  SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11917  N0.getOperand(1), N0.getOperand(2));
11918  return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11919  }
11920 
11921  // If the desired elements are smaller or larger than the source
11922  // elements we can use a matching integer vector type and then
11923  // truncate/any extend followed by zext_in_reg.
11924  EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11925  SDValue VsetCC =
11926  DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11927  N0.getOperand(1), N0.getOperand(2));
11928  return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11929  N0.getValueType());
11930  }
11931 
11932  // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11933  SDLoc DL(N);
11934  EVT N0VT = N0.getValueType();
11935  EVT N00VT = N0.getOperand(0).getValueType();
11936  if (SDValue SCC = SimplifySelectCC(
11937  DL, N0.getOperand(0), N0.getOperand(1),
11938  DAG.getBoolConstant(true, DL, N0VT, N00VT),
11939  DAG.getBoolConstant(false, DL, N0VT, N00VT),
11940  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11941  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11942  }
11943 
11944  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11945  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11946  isa<ConstantSDNode>(N0.getOperand(1)) &&
11947  N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11948  N0.hasOneUse()) {
11949  SDValue ShAmt = N0.getOperand(1);
11950  if (N0.getOpcode() == ISD::SHL) {
11951  SDValue InnerZExt = N0.getOperand(0);
11952  // If the original shl may be shifting out bits, do not perform this
11953  // transformation.
11954  unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11955  InnerZExt.getOperand(0).getValueSizeInBits();
11956  if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11957  return SDValue();
11958  }
11959 
11960  SDLoc DL(N);
11961 
11962  // Ensure that the shift amount is wide enough for the shifted value.
11963  if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11964  ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11965 
11966  return DAG.getNode(N0.getOpcode(), DL, VT,
11967  DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11968  ShAmt);
11969  }
11970 
11971  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11972  return NewVSel;
11973 
11974  if (SDValue NewCtPop = widenCtPop(N, DAG))
11975  return NewCtPop;
11976 
11977  if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11978  return Res;
11979 
11980  return SDValue();
11981 }
11982 
11983 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11984  SDValue N0 = N->getOperand(0);
11985  EVT VT = N->getValueType(0);
11986 
11987  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11988  return Res;
11989 
11990  // fold (aext (aext x)) -> (aext x)
11991  // fold (aext (zext x)) -> (zext x)
11992  // fold (aext (sext x)) -> (sext x)
11993  if (N0.getOpcode() == ISD::ANY_EXTEND ||
11994  N0.getOpcode() == ISD::ZERO_EXTEND ||
11995  N0.getOpcode() == ISD::SIGN_EXTEND)
11996  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11997 
11998  // fold (aext (truncate (load x))) -> (aext (smaller load x))
11999  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
12000  if (N0.getOpcode() == ISD::TRUNCATE) {
12001  if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12002  SDNode *oye = N0.getOperand(0).getNode();
12003  if (NarrowLoad.getNode() != N0.getNode()) {
12004  CombineTo(N0.getNode(), NarrowLoad);
12005  // CombineTo deleted the truncate, if needed, but not what's under it.
12006  AddToWorklist(oye);
12007  }
12008  return SDValue(N, 0); // Return N so it doesn't get rechecked!
12009  }
12010  }
12011 
12012  // fold (aext (truncate x))
12013  if (N0.getOpcode() == ISD::TRUNCATE)
12014  return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12015 
12016  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
12017  // if the trunc is not free.
12018  if (N0.getOpcode() == ISD::AND &&
12019  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12020  N0.getOperand(1).getOpcode() == ISD::Constant &&
12022  N0.getValueType())) {
12023  SDLoc DL(N);
12024  SDValue X = N0.getOperand(0).getOperand(0);
12025  X = DAG.getAnyExtOrTrunc(X, DL, VT);
12027  return DAG.getNode(ISD::AND, DL, VT,
12028  X, DAG.getConstant(Mask, DL, VT));
12029  }
12030 
12031  // fold (aext (load x)) -> (aext (truncate (extload x)))
12032  // None of the supported targets knows how to perform load and any_ext
12033  // on vectors in one instruction, so attempt to fold to zext instead.
12034  if (VT.isVector()) {
12035  // Try to simplify (zext (load x)).
12036  if (SDValue foldedExt =
12037  tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12039  return foldedExt;
12040  } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
12041  ISD::isUNINDEXEDLoad(N0.getNode()) &&
12042  TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12043  bool DoXform = true;
12044  SmallVector<SDNode *, 4> SetCCs;
12045  if (!N0.hasOneUse())
12046  DoXform =
12047  ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
12048  if (DoXform) {
12049  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12050  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12051  LN0->getChain(), LN0->getBasePtr(),
12052  N0.getValueType(), LN0->getMemOperand());
12053  ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
12054  // If the load value is used only by N, replace it via CombineTo N.
12055  bool NoReplaceTrunc = N0.hasOneUse();
12056  CombineTo(N, ExtLoad);
12057  if (NoReplaceTrunc) {
12058  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12059  recursivelyDeleteUnusedNodes(LN0);
12060  } else {
12061  SDValue Trunc =
12062  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
12063  CombineTo(LN0, Trunc, ExtLoad.getValue(1));
12064  }
12065  return SDValue(N, 0); // Return N so it doesn't get rechecked!
12066  }
12067  }
12068 
12069  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
12070  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
12071  // fold (aext ( extload x)) -> (aext (truncate (extload x)))
12072  if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
12073  ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
12074  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12075  ISD::LoadExtType ExtType = LN0->getExtensionType();
12076  EVT MemVT = LN0->getMemoryVT();
12077  if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
12078  SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
12079  VT, LN0->getChain(), LN0->getBasePtr(),
12080  MemVT, LN0->getMemOperand());
12081  CombineTo(N, ExtLoad);
12082  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12083  recursivelyDeleteUnusedNodes(LN0);
12084  return SDValue(N, 0); // Return N so it doesn't get rechecked!
12085  }
12086  }
12087 
12088  if (N0.getOpcode() == ISD::SETCC) {
12089  // For vectors:
12090  // aext(setcc) -> vsetcc
12091  // aext(setcc) -> truncate(vsetcc)
12092  // aext(setcc) -> aext(vsetcc)
12093  // Only do this before legalize for now.
12094  if (VT.isVector() && !LegalOperations) {
12095  EVT N00VT = N0.getOperand(0).getValueType();
12096  if (getSetCCResultType(N00VT) == N0.getValueType())
12097  return SDValue();
12098 
12099  // We know that the # elements of the results is the same as the
12100  // # elements of the compare (and the # elements of the compare result
12101  // for that matter). Check to see that they are the same size. If so,
12102  // we know that the element size of the sext'd result matches the
12103  // element size of the compare operands.
12104  if (VT.getSizeInBits() == N00VT.getSizeInBits())
12105  return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
12106  N0.getOperand(1),
12107  cast<CondCodeSDNode>(N0.getOperand(2))->get());
12108 
12109  // If the desired elements are smaller or larger than the source
12110  // elements we can use a matching integer vector type and then
12111  // truncate/any extend
12112  EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12113  SDValue VsetCC =
12114  DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
12115  N0.getOperand(1),
12116  cast<CondCodeSDNode>(N0.getOperand(2))->get());
12117  return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12118  }
12119 
12120  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12121  SDLoc DL(N);
12122  if (SDValue SCC = SimplifySelectCC(
12123  DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12124  DAG.getConstant(0, DL, VT),
12125  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12126  return SCC;
12127  }
12128 
12129  if (SDValue NewCtPop = widenCtPop(N, DAG))
12130  return NewCtPop;
12131 
12132  if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12133  return Res;
12134 
12135  return SDValue();
12136 }
12137 
12138 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12139  unsigned Opcode = N->getOpcode();
12140  SDValue N0 = N->getOperand(0);
12141  SDValue N1 = N->getOperand(1);
12142  EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12143 
12144  // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12145  if (N0.getOpcode() == Opcode &&
12146  AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12147  return N0;
12148 
12149  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12150  N0.getOperand(0).getOpcode() == Opcode) {
12151  // We have an assert, truncate, assert sandwich. Make one stronger assert
12152  // by asserting on the smallest asserted type to the larger source type.
12153  // This eliminates the later assert:
12154  // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12155  // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12156  SDValue BigA = N0.getOperand(0);
12157  EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12158  assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12159  "Asserting zero/sign-extended bits to a type larger than the "
12160  "truncated destination does not provide information");
12161 
12162  SDLoc DL(N);
12163  EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
12164  SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
12165  SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12166  BigA.getOperand(0), MinAssertVTVal);
12167  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12168  }
12169 
12170  // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12171  // than X. Just move the AssertZext in front of the truncate and drop the
12172  // AssertSExt.
12173  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12174  N0.getOperand(0).getOpcode() == ISD::AssertSext &&
12175  Opcode == ISD::AssertZext) {
12176  SDValue BigA = N0.getOperand(0);
12177  EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12178  assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12179  "Asserting zero/sign-extended bits to a type larger than the "
12180  "truncated destination does not provide information");
12181 
12182  if (AssertVT.bitsLT(BigA_AssertVT)) {
12183  SDLoc DL(N);
12184  SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12185  BigA.getOperand(0), N1);
12186  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12187  }
12188  }
12189 
12190  return SDValue();
12191 }
12192 
12193 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12194  SDLoc DL(N);
12195 
12196  Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12197  SDValue N0 = N->getOperand(0);
12198 
12199  // Fold (assertalign (assertalign x, AL0), AL1) ->
12200  // (assertalign x, max(AL0, AL1))
12201  if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12202  return DAG.getAssertAlign(DL, N0.getOperand(0),
12203  std::max(AL, AAN->getAlign()));
12204 
12205  // In rare cases, there are trivial arithmetic ops in source operands. Sink
12206  // this assert down to source operands so that those arithmetic ops could be
12207  // exposed to the DAG combining.
12208  switch (N0.getOpcode()) {
12209  default:
12210  break;
12211  case ISD::ADD:
12212  case ISD::SUB: {
12213  unsigned AlignShift = Log2(AL);
12214  SDValue LHS = N0.getOperand(0);
12215  SDValue RHS = N0.getOperand(1);
12216  unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
12217  unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12218  if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
12219  if (LHSAlignShift < AlignShift)
12220  LHS = DAG.getAssertAlign(DL, LHS, AL);
12221  if (RHSAlignShift < AlignShift)
12222  RHS = DAG.getAssertAlign(DL, RHS, AL);
12223  return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12224  }
12225  break;
12226  }
12227  }
12228 
12229  return SDValue();
12230 }
12231 
12232 /// If the result of a load is shifted/masked/truncated to an effectively
12233 /// narrower type, try to transform the load to a narrower type and/or
12234 /// use an extending load.
12235 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12236  unsigned Opc = N->getOpcode();
12237 
12239  SDValue N0 = N->getOperand(0);
12240  EVT VT = N->getValueType(0);
12241  EVT ExtVT = VT;
12242 
12243  // This transformation isn't valid for vector loads.
12244  if (VT.isVector())
12245  return SDValue();
12246 
12247  // The ShAmt variable is used to indicate that we've consumed a right
12248  // shift. I.e. we want to narrow the width of the load by skipping to load the
12249  // ShAmt least significant bits.
12250  unsigned ShAmt = 0;
12251  // A special case is when the least significant bits from the load are masked
12252  // away, but using an AND rather than a right shift. HasShiftedOffset is used
12253  // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12254  // the result.
12255  bool HasShiftedOffset = false;
12256  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12257  // extended to VT.
12258  if (Opc == ISD::SIGN_EXTEND_INREG) {
12259  ExtType = ISD::SEXTLOAD;
12260  ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12261  } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
12262  // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
12263  // value, or it may be shifting a higher subword, half or byte into the
12264  // lowest bits.
12265 
12266  // Only handle shift with constant shift amount, and the shiftee must be a
12267  // load.
12268  auto *LN = dyn_cast<LoadSDNode>(N0);
12269  auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12270  if (!N1C || !LN)
12271  return SDValue();
12272  // If the shift amount is larger than the memory type then we're not
12273  // accessing any of the loaded bytes.
12274  ShAmt = N1C->getZExtValue();
12275  uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12276  if (MemoryWidth <= ShAmt)
12277  return SDValue();
12278  // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
12279  ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
12280  ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12281  // If original load is a SEXTLOAD then we can't simply replace it by a
12282  // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12283  // followed by a ZEXT, but that is not handled at the moment). Similarly if
12284  // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
12285  if ((LN->getExtensionType() == ISD::SEXTLOAD ||
12286  LN->getExtensionType() == ISD::ZEXTLOAD) &&
12287  LN->getExtensionType() != ExtType)
12288  return SDValue();
12289  } else if (Opc == ISD::AND) {
12290  // An AND with a constant mask is the same as a truncate + zero-extend.
12291  auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12292  if (!AndC)
12293  return SDValue();
12294 
12295  const APInt &Mask = AndC->getAPIntValue();
12296  unsigned ActiveBits = 0;
12297  if (Mask.isMask()) {
12298  ActiveBits = Mask.countTrailingOnes();
12299  } else if (Mask.isShiftedMask()) {
12300  ShAmt = Mask.countTrailingZeros();
12301  APInt ShiftedMask = Mask.lshr(ShAmt);
12302  ActiveBits = ShiftedMask.countTrailingOnes();
12303  HasShiftedOffset = true;
12304  } else
12305  return SDValue();
12306 
12307  ExtType = ISD::ZEXTLOAD;
12308  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
12309  }
12310 
12311  // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12312  // a right shift. Here we redo some of those checks, to possibly adjust the
12313  // ExtVT even further based on "a masking AND". We could also end up here for
12314  // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12315  // need to be done here as well.
12316  if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12317  SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12318  // Bail out when the SRL has more than one use. This is done for historical
12319  // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12320  // check below? And maybe it could be non-profitable to do the transform in
12321  // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12322  // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12323  if (!SRL.hasOneUse())
12324  return SDValue();
12325 
12326  // Only handle shift with constant shift amount, and the shiftee must be a
12327  // load.
12328  auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12329  auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12330  if (!SRL1C || !LN)
12331  return SDValue();
12332 
12333  // If the shift amount is larger than the input type then we're not
12334  // accessing any of the loaded bytes. If the load was a zextload/extload
12335  // then the result of the shift+trunc is zero/undef (handled elsewhere).
12336  ShAmt = SRL1C->getZExtValue();
12337  uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
12338  if (ShAmt >= MemoryWidth)
12339  return SDValue();
12340 
12341  // Because a SRL must be assumed to *need* to zero-extend the high bits
12342  // (as opposed to anyext the high bits), we can't combine the zextload
12343  // lowering of SRL and an sextload.
12344  if (LN->getExtensionType() == ISD::SEXTLOAD)
12345  return SDValue();
12346 
12347  // Avoid reading outside the memory accessed by the original load (could
12348  // happened if we only adjust the load base pointer by ShAmt). Instead we
12349  // try to narrow the load even further. The typical scenario here is:
12350  // (i64 (truncate (i96 (srl (load x), 64)))) ->
12351  // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
12352  if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
12353  // Don't replace sextload by zextload.
12354  if (ExtType == ISD::SEXTLOAD)
12355  return SDValue();
12356  // Narrow the load.
12357  ExtType = ISD::ZEXTLOAD;
12358  ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12359  }
12360 
12361  // If the SRL is only used by a masking AND, we may be able to adjust
12362  // the ExtVT to make the AND redundant.
12363  SDNode *Mask = *(SRL->use_begin());
12364  if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12365  isa<ConstantSDNode>(Mask->getOperand(1))) {
12366  const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12367  if (ShiftMask.isMask()) {
12368  EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
12369  ShiftMask.countTrailingOnes());
12370  // If the mask is smaller, recompute the type.
12371  if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
12372  TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12373  ExtVT = MaskedVT;
12374  }
12375  }
12376 
12377  N0 = SRL.getOperand(0);
12378  }
12379 
12380  // If the load is shifted left (and the result isn't shifted back right), we
12381  // can fold a truncate through the shift. The typical scenario is that N
12382  // points at a TRUNCATE here so the attempted fold is:
12383  // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12384  // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12385  unsigned ShLeftAmt = 0;
12386  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12387  ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12388  if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
12389  ShLeftAmt = N01->getZExtValue();
12390  N0 = N0.getOperand(0);
12391  }
12392  }
12393 
12394  // If we haven't found a load, we can't narrow it.
12395  if (!isa<LoadSDNode>(N0))
12396  return SDValue();
12397 
12398  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12399  // Reducing the width of a volatile load is illegal. For atomics, we may be
12400  // able to reduce the width provided we never widen again. (see D66309)
12401  if (!LN0->isSimple() ||
12402  !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12403  return SDValue();
12404 
12405  auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12406  unsigned LVTStoreBits =
12408  unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12409  return LVTStoreBits - EVTStoreBits - ShAmt;
12410  };
12411 
12412  // We need to adjust the pointer to the load by ShAmt bits in order to load
12413  // the correct bytes.
12414  unsigned PtrAdjustmentInBits =
12415  DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12416 
12417  uint64_t PtrOff = PtrAdjustmentInBits / 8;
12418  Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12419  SDLoc DL(LN0);
12420  // The original load itself didn't wrap, so an offset within it doesn't.
12421  SDNodeFlags Flags;
12422  Flags.setNoUnsignedWrap(true);
12423  SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12424  TypeSize::Fixed(PtrOff), DL, Flags);
12425  AddToWorklist(NewPtr.getNode());
12426 
12427  SDValue Load;
12428  if (ExtType == ISD::NON_EXTLOAD)
12429  Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12430  LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12431  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12432  else
12433  Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12434  LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12435  NewAlign, LN0->getMemOperand()->getFlags(),
12436  LN0->getAAInfo());
12437 
12438  // Replace the old load's chain with the new load's chain.
12439  WorklistRemover DeadNodes(*this);
12440  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12441 
12442  // Shift the result left, if we've swallowed a left shift.
12443  SDValue Result = Load;
12444  if (ShLeftAmt != 0) {
12445  EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12446  if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12447  ShImmTy = VT;
12448  // If the shift amount is as large as the result size (but, presumably,
12449  // no larger than the source) then the useful bits of the result are
12450  // zero; we can't simply return the shortened shift, because the result
12451  // of that operation is undefined.
12452  if (ShLeftAmt >= VT.getScalarSizeInBits())
12453  Result = DAG.getConstant(0, DL, VT);
12454  else
12455  Result = DAG.getNode(ISD::SHL, DL, VT,
12456  Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12457  }
12458 
12459  if (HasShiftedOffset) {
12460  // We're using a shifted mask, so the load now has an offset. This means
12461  // that data has been loaded into the lower bytes than it would have been
12462  // before, so we need to shl the loaded data into the correct position in the
12463  // register.
12464  SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12465  Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12466  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12467  }
12468 
12469  // Return the new loaded value.
12470  return Result;
12471 }
12472 
12473 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12474  SDValue N0 = N->getOperand(0);
12475  SDValue N1 = N->getOperand(1);
12476  EVT VT = N->getValueType(0);
12477  EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12478  unsigned VTBits = VT.getScalarSizeInBits();
12479  unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12480 
12481  // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12482  if (N0.isUndef())
12483  return DAG.getConstant(0, SDLoc(N), VT);
12484 
12485  // fold (sext_in_reg c1) -> c1
12487  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12488 
12489  // If the input is already sign extended, just drop the extension.
12490  if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12491  return N0;
12492 
12493  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12494  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12495  ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12496  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12497  N1);
12498 
12499  // fold (sext_in_reg (sext x)) -> (sext x)
12500  // fold (sext_in_reg (aext x)) -> (sext x)
12501  // if x is small enough or if we know that x has more than 1 sign bit and the
12502  // sign_extend_inreg is extending from one of them.
12503  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12504  SDValue N00 = N0.getOperand(0);
12505  unsigned N00Bits = N00.getScalarValueSizeInBits();
12506  if ((N00Bits <= ExtVTBits ||
12507  DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
12508  (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12509  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12510  }
12511 
12512  // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12513  // if x is small enough or if we know that x has more than 1 sign bit and the
12514  // sign_extend_inreg is extending from one of them.
12518  SDValue N00 = N0.getOperand(0);
12519  unsigned N00Bits = N00.getScalarValueSizeInBits();
12520  unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12521  unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12522  bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12523  APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12524  if ((N00Bits == ExtVTBits ||
12525  (!IsZext && (N00Bits < ExtVTBits ||
12526  DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
12527  (!LegalOperations ||
12529  return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12530  }
12531 
12532  // fold (sext_in_reg (zext x)) -> (sext x)
12533  // iff we are extending the source sign bit.
12534  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12535  SDValue N00 = N0.getOperand(0);
12536  if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12537  (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12538  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12539  }
12540 
12541  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12542  if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12543  return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12544 
12545  // fold operands of sext_in_reg based on knowledge that the top bits are not
12546  // demanded.
12547  if (SimplifyDemandedBits(SDValue(N, 0)))
12548  return SDValue(N, 0);
12549 
12550  // fold (sext_in_reg (load x)) -> (smaller sextload x)
12551  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12552  if (SDValue NarrowLoad = reduceLoadWidth(N))
12553  return NarrowLoad;
12554 
12555  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12556  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12557  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12558  if (N0.getOpcode() == ISD::SRL) {
12559  if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12560  if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12561  // We can turn this into an SRA iff the input to the SRL is already sign
12562  // extended enough.
12563  unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12564  if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12565  return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12566  N0.getOperand(1));
12567  }
12568  }
12569 
12570  // fold (sext_inreg (extload x)) -> (sextload x)
12571  // If sextload is not supported by target, we can only do the combine when
12572  // load has one use. Doing otherwise can block folding the extload with other
12573  // extends that the target does support.
12574  if (ISD::isEXTLoad(N0.getNode()) &&
12575  ISD::isUNINDEXEDLoad(N0.getNode()) &&
12576  ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12577  ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12578  N0.hasOneUse()) ||
12579  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12580  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12581  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12582  LN0->getChain(),
12583  LN0->getBasePtr(), ExtVT,
12584  LN0->getMemOperand());
12585  CombineTo(N, ExtLoad);
12586  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12587  AddToWorklist(ExtLoad.getNode());
12588  return SDValue(N, 0); // Return N so it doesn't get rechecked!
12589  }
12590 
12591  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12592  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12593  N0.hasOneUse() &&
12594  ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12595  ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12596  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12597  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12598  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12599  LN0->getChain(),
12600  LN0->getBasePtr(), ExtVT,
12601  LN0->getMemOperand());
12602  CombineTo(N, ExtLoad);
12603  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12604  return SDValue(N, 0); // Return N so it doesn't get rechecked!
12605  }
12606 
12607  // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12608  // ignore it if the masked load is already sign extended
12609  if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12610  if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12611  Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12612  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12613  SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12614  VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12615  Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12616  Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12617  CombineTo(N, ExtMaskedLoad);
12618  CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12619  return SDValue(N, 0); // Return N so it doesn't get rechecked!
12620  }
12621  }
12622 
12623  // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12624  if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12625  if (SDValue(GN0, 0).hasOneUse() &&
12626  ExtVT == GN0->getMemoryVT() &&
12627  TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12628  SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
12629  GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
12630 
12631  SDValue ExtLoad = DAG.getMaskedGather(
12632  DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12633  GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12634 
12635  CombineTo(N, ExtLoad);
12636  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12637  AddToWorklist(ExtLoad.getNode());
12638  return SDValue(N, 0); // Return N so it doesn't get rechecked!
12639  }
12640  }
12641 
12642  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12643  if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12644  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12645  N0.getOperand(1), false))
12646  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12647  }
12648 
12649  return SDValue();
12650 }
12651 
12652 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12653  SDValue N0 = N->getOperand(0);
12654  EVT VT = N->getValueType(0);
12655 
12656  // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12657  if (N0.isUndef())
12658  return DAG.getConstant(0, SDLoc(N), VT);
12659 
12660  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12661  return Res;
12662 
12664  return SDValue(N, 0);
12665 
12666  return SDValue();
12667 }
12668 
12669 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12670  SDValue N0 = N->getOperand(0);
12671  EVT VT = N->getValueType(0);
12672  EVT SrcVT = N0.getValueType();
12673  bool isLE = DAG.getDataLayout().isLittleEndian();
12674 
12675  // noop truncate
12676  if (SrcVT == VT)
12677  return N0;
12678 
12679  // fold (truncate (truncate x)) -> (truncate x)
12680  if (N0.getOpcode() == ISD::TRUNCATE)
12681  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12682 
12683  // fold (truncate c1) -> c1
12685  SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12686  if (C.getNode() != N)
12687  return C;
12688  }
12689 
12690  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12691  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12692  N0.getOpcode() == ISD::SIGN_EXTEND ||
12693  N0.getOpcode() == ISD::ANY_EXTEND) {
12694  // if the source is smaller than the dest, we still need an extend.
12695  if (N0.getOperand(0).getValueType().bitsLT(VT))
12696  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12697  // if the source is larger than the dest, than we just need the truncate.
12698  if (N0.getOperand(0).getValueType().bitsGT(VT))
12699  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12700  // if the source and dest are the same type, we can drop both the extend
12701  // and the truncate.
12702  return N0.getOperand(0);
12703  }
12704 
12705  // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12706  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12707  return SDValue();
12708 
12709  // Fold extract-and-trunc into a narrow extract. For example:
12710  // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12711  // i32 y = TRUNCATE(i64 x)
12712  // -- becomes --
12713  // v16i8 b = BITCAST (v2i64 val)
12714  // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12715  //
12716  // Note: We only run this optimization after type legalization (which often
12717  // creates this pattern) and before operation legalization after which
12718  // we need to be more careful about the vector instructions that we generate.
12719  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12720  LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12721  EVT VecTy = N0.getOperand(0).getValueType();
12722  EVT ExTy = N0.getValueType();
12723  EVT TrTy = N->getValueType(0);
12724 
12725  auto EltCnt = VecTy.getVectorElementCount();
12726  unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12727  auto NewEltCnt = EltCnt * SizeRatio;
12728 
12729  EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12730  assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12731 
12732  SDValue EltNo = N0->getOperand(1);
12733  if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12734  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12735  int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12736 
12737  SDLoc DL(N);
12738  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12739  DAG.getBitcast(NVT, N0.getOperand(0)),
12740  DAG.getVectorIdxConstant(Index, DL));
12741  }
12742  }
12743 
12744  // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12745  if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12746  if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12747  TLI.isTruncateFree(SrcVT, VT)) {
12748  SDLoc SL(N0);
12749  SDValue Cond = N0.getOperand(0);
12750  SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12751  SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12752  return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12753  }
12754  }
12755 
12756  // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12757  if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12758  (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12759  TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12760  SDValue Amt = N0.getOperand(1);
12761  KnownBits Known = DAG.computeKnownBits(Amt);
12762  unsigned Size = VT.getScalarSizeInBits();
12763  if (Known.countMaxActiveBits() <= Log2_32(Size)) {
12764  SDLoc SL(N);
12765  EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12766 
12767  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12768  if (AmtVT != Amt.getValueType()) {
12769  Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12770  AddToWorklist(Amt.getNode());
12771  }
12772  return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12773  }
12774  }
12775 
12776  if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12777  return V;
12778 
12779  // Attempt to pre-truncate BUILD_VECTOR sources.
12780  if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12781  TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12782  // Avoid creating illegal types if running after type legalizer.
12783  (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12784  SDLoc DL(N);
12785  EVT SVT = VT.getScalarType();
12786  SmallVector<SDValue, 8> TruncOps;
12787  for (const SDValue &Op : N0->op_values()) {
12788  SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12789  TruncOps.push_back(TruncOp);
12790  }
12791  return DAG.getBuildVector(VT, DL, TruncOps);
12792  }
12793 
12794  // Fold a series of buildvector, bitcast, and truncate if possible.
12795  // For example fold
12796  // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12797  // (2xi32 (buildvector x, y)).
12798  if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12799  N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12800  N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12801  N0.getOperand(0).hasOneUse()) {
12802  SDValue BuildVect = N0.getOperand(0);
12803  EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12804  EVT TruncVecEltTy = VT.getVectorElementType();
12805 
12806  // Check that the element types match.
12807  if (BuildVectEltTy == TruncVecEltTy) {
12808  // Now we only need to compute the offset of the truncated elements.
12809  unsigned BuildVecNumElts = BuildVect.getNumOperands();
12810  unsigned TruncVecNumElts = VT.getVectorNumElements();
12811  unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12812 
12813  assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
12814  "Invalid number of elements");
12815 
12817  for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12818  Opnds.push_back(BuildVect.getOperand(i));
12819 
12820  return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12821  }
12822  }
12823 
12824  // See if we can simplify the input to this truncate through knowledge that
12825  // only the low bits are being used.
12826  // For example "trunc (or (shl x, 8), y)" // -> trunc y
12827  // Currently we only perform this optimization on scalars because vectors
12828  // may have different active low bits.
12829  if (!VT.isVector()) {
12830  APInt Mask =
12832  if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12833  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12834  }
12835 
12836  // fold (truncate (load x)) -> (smaller load x)
12837  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12838  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12839  if (SDValue Reduced = reduceLoadWidth(N))
12840  return Reduced;
12841 
12842  // Handle the case where the load remains an extending load even
12843  // after truncation.
12844  if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12845  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12846  if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12847  SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12848  VT, LN0->getChain(), LN0->getBasePtr(),
12849  LN0->getMemoryVT(),
12850  LN0->getMemOperand());
12851  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12852  return NewLoad;
12853  }
12854  }
12855  }
12856 
12857  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12858  // where ... are all 'undef'.
12859  if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12860  SmallVector<EVT, 8> VTs;
12861  SDValue V;
12862  unsigned Idx = 0;
12863  unsigned NumDefs = 0;
12864 
12865  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12866  SDValue X = N0.getOperand(i);
12867  if (!X.isUndef()) {
12868  V = X;
12869  Idx = i;
12870  NumDefs++;
12871  }
12872  // Stop if more than one members are non-undef.
12873  if (NumDefs > 1)
12874  break;
12875 
12877  VT.getVectorElementType(),
12878  X.getValueType().getVectorElementCount()));
12879  }
12880 
12881  if (NumDefs == 0)
12882  return DAG.getUNDEF(VT);
12883 
12884  if (NumDefs == 1) {
12885  assert(V.getNode() && "The single defined operand is empty!");
12887  for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12888  if (i != Idx) {
12889  Opnds.push_back(DAG.getUNDEF(VTs[i]));
12890  continue;
12891  }
12892  SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12893  AddToWorklist(NV.getNode());
12894  Opnds.push_back(NV);
12895  }
12896  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12897  }
12898  }
12899 
12900  // Fold truncate of a bitcast of a vector to an extract of the low vector
12901  // element.
12902  //
12903  // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12904  if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12905  SDValue VecSrc = N0.getOperand(0);
12906  EVT VecSrcVT = VecSrc.getValueType();
12907  if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12908  (!LegalOperations ||
12909  TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12910  SDLoc SL(N);
12911 
12912  unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12913  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12914  DAG.getVectorIdxConstant(Idx, SL));
12915  }
12916  }
12917 
12918  // Simplify the operands using demanded-bits information.
12919  if (SimplifyDemandedBits(SDValue(N, 0)))
12920  return SDValue(N, 0);
12921 
12922  // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12923  // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12924  // When the adde's carry is not used.
12925  if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12926  N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12927  // We only do for addcarry before legalize operation
12928  ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12929  TLI.isOperationLegal(N0.getOpcode(), VT))) {
12930  SDLoc SL(N);
12931  auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12932  auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12933  auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12934  return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12935  }
12936 
12937  // fold (truncate (extract_subvector(ext x))) ->
12938  // (extract_subvector x)
12939  // TODO: This can be generalized to cover cases where the truncate and extract
12940  // do not fully cancel each other out.
12941  if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12942  SDValue N00 = N0.getOperand(0);
12943  if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12944  N00.getOpcode() == ISD::ZERO_EXTEND ||
12945  N00.getOpcode() == ISD::ANY_EXTEND) {
12946  if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12947  VT.getVectorElementType())
12948  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12949  N00.getOperand(0), N0.getOperand(1));
12950  }
12951  }
12952 
12953  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12954  return NewVSel;
12955 
12956  // Narrow a suitable binary operation with a non-opaque constant operand by
12957  // moving it ahead of the truncate. This is limited to pre-legalization
12958  // because targets may prefer a wider type during later combines and invert
12959  // this transform.
12960  switch (N0.getOpcode()) {
12961  case ISD::ADD:
12962  case ISD::SUB:
12963  case ISD::MUL:
12964  case ISD::AND:
12965  case ISD::OR:
12966  case ISD::XOR:
12967  if (!LegalOperations && N0.hasOneUse() &&
12968  (isConstantOrConstantVector(N0.getOperand(0), true) ||
12969  isConstantOrConstantVector(N0.getOperand(1), true))) {
12970  // TODO: We already restricted this to pre-legalization, but for vectors
12971  // we are extra cautious to not create an unsupported operation.
12972  // Target-specific changes are likely needed to avoid regressions here.
12973  if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12974  SDLoc DL(N);
12975  SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12976  SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12977  return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12978  }
12979  }
12980  break;
12981  case ISD::USUBSAT:
12982  // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12983  // enough to know that the upper bits are zero we must ensure that we don't
12984  // introduce an extra truncate.
12985  if (!LegalOperations && N0.hasOneUse() &&
12986  N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12988  VT.getScalarSizeInBits() &&
12989  hasOperation(N0.getOpcode(), VT)) {
12990  return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12991  DAG, SDLoc(N));
12992  }
12993  break;
12994  }
12995 
12996  return SDValue();
12997 }
12998 
12999 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
13000  SDValue Elt = N->getOperand(i);
13001  if (Elt.getOpcode() != ISD::MERGE_VALUES)
13002  return Elt.getNode();
13003  return Elt.getOperand(Elt.getResNo()).getNode();
13004 }
13005 
13006 /// build_pair (load, load) -> load
13007 /// if load locations are consecutive.
13008 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
13009  assert(N->getOpcode() == ISD::BUILD_PAIR);
13010 
13011  auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
13012  auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
13013 
13014  // A BUILD_PAIR is always having the least significant part in elt 0 and the
13015  // most significant part in elt 1. So when combining into one large load, we
13016  // need to consider the endianness.
13017  if (DAG.getDataLayout().isBigEndian())
13018  std::swap(LD1, LD2);
13019 
13020  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
13021  !LD1->hasOneUse() || !LD2->hasOneUse() ||
13022  LD1->getAddressSpace() != LD2->getAddressSpace())
13023  return SDValue();
13024 
13025  bool LD1Fast = false;
13026  EVT LD1VT = LD1->getValueType(0);
13027  unsigned LD1Bytes = LD1VT.getStoreSize();
13028  if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
13029  DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
13030  TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
13031  *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
13032  return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
13033  LD1->getPointerInfo(), LD1->getAlign());
13034 
13035  return SDValue();
13036 }
13037 
13038 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
13039  // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
13040  // and Lo parts; on big-endian machines it doesn't.
13041  return DAG.getDataLayout().isBigEndian() ? 1 : 0;
13042 }
13043 
13045  const TargetLowering &TLI) {
13046  // If this is not a bitcast to an FP type or if the target doesn't have
13047  // IEEE754-compliant FP logic, we're done.
13048  EVT VT = N->getValueType(0);
13049  if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
13050  return SDValue();
13051 
13052  // TODO: Handle cases where the integer constant is a different scalar
13053  // bitwidth to the FP.
13054  SDValue N0 = N->getOperand(0);
13055  EVT SourceVT = N0.getValueType();
13056  if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
13057  return SDValue();
13058 
13059  unsigned FPOpcode;
13060  APInt SignMask;
13061  switch (N0.getOpcode()) {
13062  case ISD::AND:
13063  FPOpcode = ISD::FABS;
13064  SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
13065  break;
13066  case ISD::XOR:
13067  FPOpcode = ISD::FNEG;
13068  SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13069  break;
13070  case ISD::OR:
13071  FPOpcode = ISD::FABS;
13072  SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13073  break;
13074  default:
13075  return SDValue();
13076  }
13077 
13078  // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
13079  // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
13080  // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
13081  // fneg (fabs X)
13082  SDValue LogicOp0 = N0.getOperand(0);
13083  ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
13084  if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
13085  LogicOp0.getOpcode() == ISD::BITCAST &&
13086  LogicOp0.getOperand(0).getValueType() == VT) {
13087  SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
13088  NumFPLogicOpsConv++;
13089  if (N0.getOpcode() == ISD::OR)
13090  return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
13091  return FPOp;
13092  }
13093 
13094  return SDValue();
13095 }
13096 
13097 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
13098  SDValue N0 = N->getOperand(0);
13099  EVT VT = N->getValueType(0);
13100 
13101  if (N0.isUndef())
13102  return DAG.getUNDEF(VT);
13103 
13104  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
13105  // Only do this before legalize types, unless both types are integer and the
13106  // scalar type is legal. Only do this before legalize ops, since the target
13107  // maybe depending on the bitcast.
13108  // First check to see if this is all constant.
13109  // TODO: Support FP bitcasts after legalize types.
13110  if (VT.isVector() &&
13111  (!LegalTypes ||
13112  (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
13113  TLI.isTypeLegal(VT.getVectorElementType()))) &&
13114  N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
13115  cast<BuildVectorSDNode>(N0)->isConstant())
13116  return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
13117  VT.getVectorElementType());
13118 
13119  // If the input is a constant, let getNode fold it.
13120  if (isIntOrFPConstant(N0)) {
13121  // If we can't allow illegal operations, we need to check that this is just
13122  // a fp -> int or int -> conversion and that the resulting operation will
13123  // be legal.
13124  if (!LegalOperations ||
13125  (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
13126  TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
13127  (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13128  TLI.isOperationLegal(ISD::Constant, VT))) {
13129  SDValue C = DAG.getBitcast(VT, N0);
13130  if (C.getNode() != N)
13131  return C;
13132  }
13133  }
13134 
13135  // (conv (conv x, t1), t2) -> (conv x, t2)
13136  if (N0.getOpcode() == ISD::BITCAST)
13137  return DAG.getBitcast(VT, N0.getOperand(0));
13138 
13139  // fold (conv (load x)) -> (load (conv*)x)
13140  // If the resultant load doesn't need a higher alignment than the original!
13141  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13142  // Do not remove the cast if the types differ in endian layout.
13144  TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13145  // If the load is volatile, we only want to change the load type if the
13146  // resulting load is legal. Otherwise we might increase the number of
13147  // memory accesses. We don't care if the original type was legal or not
13148  // as we assume software couldn't rely on the number of accesses of an
13149  // illegal type.
13150  ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13151  TLI.isOperationLegal(ISD::LOAD, VT))) {
13152  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13153 
13154  if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13155  *LN0->getMemOperand())) {
13156  SDValue Load =
13157  DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13158  LN0->getPointerInfo(), LN0->getAlign(),
13159  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13160  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13161  return Load;
13162  }
13163  }
13164 
13165  if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13166  return V;
13167 
13168  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13169  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13170  //
13171  // For ppc_fp128:
13172  // fold (bitcast (fneg x)) ->
13173  // flipbit = signbit
13174  // (xor (bitcast x) (build_pair flipbit, flipbit))
13175  //
13176  // fold (bitcast (fabs x)) ->
13177  // flipbit = (and (extract_element (bitcast x), 0), signbit)
13178  // (xor (bitcast x) (build_pair flipbit, flipbit))
13179  // This often reduces constant pool loads.
13180  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13181  (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13182  N0.getNode()->hasOneUse() && VT.isInteger() &&
13183  !VT.isVector() && !N0.getValueType().isVector()) {
13184  SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13185  AddToWorklist(NewConv.getNode());
13186 
13187  SDLoc DL(N);
13188  if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13189  assert(VT.getSizeInBits() == 128);
13190  SDValue SignBit = DAG.getConstant(
13192  SDValue FlipBit;
13193  if (N0.getOpcode() == ISD::FNEG) {
13194  FlipBit = SignBit;
13195  AddToWorklist(FlipBit.getNode());
13196  } else {
13197  assert(N0.getOpcode() == ISD::FABS);
13198  SDValue Hi =
13199  DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
13201  SDLoc(NewConv)));
13202  AddToWorklist(Hi.getNode());
13203  FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13204  AddToWorklist(FlipBit.getNode());
13205  }
13206  SDValue FlipBits =
13207  DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13208  AddToWorklist(FlipBits.getNode());
13209  return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13210  }
13211  APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13212  if (N0.getOpcode() == ISD::FNEG)
13213  return DAG.getNode(ISD::XOR, DL, VT,
13214  NewConv, DAG.getConstant(SignBit, DL, VT));
13215  assert(N0.getOpcode() == ISD::FABS);
13216  return DAG.getNode(ISD::AND, DL, VT,
13217  NewConv, DAG.getConstant(~SignBit, DL, VT));
13218  }
13219 
13220  // fold (bitconvert (fcopysign cst, x)) ->
13221  // (or (and (bitconvert x), sign), (and cst, (not sign)))
13222  // Note that we don't handle (copysign x, cst) because this can always be
13223  // folded to an fneg or fabs.
13224  //
13225  // For ppc_fp128:
13226  // fold (bitcast (fcopysign cst, x)) ->
13227  // flipbit = (and (extract_element
13228  // (xor (bitcast cst), (bitcast x)), 0),
13229  // signbit)
13230  // (xor (bitcast cst) (build_pair flipbit, flipbit))
13231  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
13232  isa<ConstantFPSDNode>(N0.getOperand(0)) &&
13233  VT.isInteger() && !VT.isVector()) {
13234  unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13235  EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
13236  if (isTypeLegal(IntXVT)) {
13237  SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13238  AddToWorklist(X.getNode());
13239 
13240  // If X has a different width than the result/lhs, sext it or truncate it.
13241  unsigned VTWidth = VT.getSizeInBits();
13242  if (OrigXWidth < VTWidth) {
13243  X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13244  AddToWorklist(X.getNode());
13245  } else if (OrigXWidth > VTWidth) {
13246  // To get the sign bit in the right place, we have to shift it right
13247  // before truncating.
13248  SDLoc DL(X);
13249  X = DAG.getNode(ISD::SRL, DL,
13250  X.getValueType(), X,
13251  DAG.getConstant(OrigXWidth-VTWidth, DL,
13252  X.getValueType()));
13253  AddToWorklist(X.getNode());
13254  X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13255  AddToWorklist(X.getNode());
13256  }
13257 
13258  if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13259  APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13260  SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13261  AddToWorklist(Cst.getNode());
13262  SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13263  AddToWorklist(X.getNode());
13264  SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13265  AddToWorklist(XorResult.getNode());
13266  SDValue XorResult64 = DAG.getNode(
13267  ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
13269  SDLoc(XorResult)));
13270  AddToWorklist(XorResult64.getNode());
13271  SDValue FlipBit =
13272  DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
13273  DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13274  AddToWorklist(FlipBit.getNode());
13275  SDValue FlipBits =
13276  DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13277  AddToWorklist(FlipBits.getNode());
13278  return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13279  }
13280  APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13281  X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13282  X, DAG.getConstant(SignBit, SDLoc(X), VT));
13283  AddToWorklist(X.getNode());
13284 
13285  SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13286  Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13287  Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13288  AddToWorklist(Cst.getNode());
13289 
13290  return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13291  }
13292  }
13293 
13294  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13295  if (N0.getOpcode() == ISD::BUILD_PAIR)
13296  if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
13297  return CombineLD;
13298 
13299  // Remove double bitcasts from shuffles - this is often a legacy of
13300  // XformToShuffleWithZero being used to combine bitmaskings (of
13301  // float vectors bitcast to integer vectors) into shuffles.
13302  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13303  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13304  N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13307  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
13308 
13309  // If operands are a bitcast, peek through if it casts the original VT.
13310  // If operands are a constant, just bitcast back to original VT.
13311  auto PeekThroughBitcast = [&](SDValue Op) {
13312  if (Op.getOpcode() == ISD::BITCAST &&
13313  Op.getOperand(0).getValueType() == VT)
13314  return SDValue(Op.getOperand(0));
13315  if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
13317  return DAG.getBitcast(VT, Op);
13318  return SDValue();
13319  };
13320 
13321  // FIXME: If either input vector is bitcast, try to convert the shuffle to
13322  // the result type of this bitcast. This would eliminate at least one
13323  // bitcast. See the transform in InstCombine.
13324  SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
13325  SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
13326  if (!(SV0 && SV1))
13327  return SDValue();
13328 
13329  int MaskScale =
13331  SmallVector<int, 8> NewMask;
13332  for (int M : SVN->getMask())
13333  for (int i = 0; i != MaskScale; ++i)
13334  NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13335 
13336  SDValue LegalShuffle =
13337  TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13338  if (LegalShuffle)
13339  return LegalShuffle;
13340  }
13341 
13342  return SDValue();
13343 }
13344 
13345 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13346  EVT VT = N->getValueType(0);
13347  return CombineConsecutiveLoads(N, VT);
13348 }
13349 
13350 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13351  SDValue N0 = N->getOperand(0);
13352 
13353  if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13354  return N0;
13355 
13356  return SDValue();
13357 }
13358 
13359 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13360 /// operands. DstEltVT indicates the destination element value type.
13361 SDValue DAGCombiner::
13362 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13363  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13364 
13365  // If this is already the right type, we're done.
13366  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13367 
13368  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13369  unsigned DstBitSize = DstEltVT.getSizeInBits();
13370 
13371  // If this is a conversion of N elements of one type to N elements of another
13372  // type, convert each element. This handles FP<->INT cases.
13373  if (SrcBitSize == DstBitSize) {
13375  for (SDValue Op : BV->op_values()) {
13376  // If the vector element type is not legal, the BUILD_VECTOR operands
13377  // are promoted and implicitly truncated. Make that explicit here.
13378  if (Op.getValueType() != SrcEltVT)
13379  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13380  Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13381  AddToWorklist(Ops.back().getNode());
13382  }
13383  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
13385  return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13386  }
13387 
13388  // Otherwise, we're growing or shrinking the elements. To avoid having to
13389  // handle annoying details of growing/shrinking FP values, we convert them to
13390  // int first.
13391  if (SrcEltVT.isFloatingPoint()) {
13392  // Convert the input float vector to a int vector where the elements are the
13393  // same sizes.
13394  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13395  BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
13396  SrcEltVT = IntVT;
13397  }
13398 
13399  // Now we know the input is an integer vector. If the output is a FP type,
13400  // convert to integer first, then to FP of the right size.
13401  if (DstEltVT.isFloatingPoint()) {
13402  EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13403  SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
13404 
13405  // Next, convert to FP elements of the same size.
13406  return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
13407  }
13408 
13409  // Okay, we know the src/dst types are both integers of differing types.
13410  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
13411 
13412  // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13413  // BuildVectorSDNode?
13414  auto *BVN = cast<BuildVectorSDNode>(BV);
13415 
13416  // Extract the constant raw bit data.
13417  BitVector UndefElements;
13418  SmallVector<APInt> RawBits;
13419  bool IsLE = DAG.getDataLayout().isLittleEndian();
13420  if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13421  return SDValue();
13422 
13423  SDLoc DL(BV);
13425  for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13426  if (UndefElements[I])
13427  Ops.push_back(DAG.getUNDEF(DstEltVT));
13428  else
13429  Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13430  }
13431 
13432  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13433  return DAG.getBuildVector(VT, DL, Ops);
13434 }
13435 
13436 // Returns true if floating point contraction is allowed on the FMUL-SDValue
13437 // `N`
13439  assert(N.getOpcode() == ISD::FMUL);
13440 
13441  return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13442  N->getFlags().hasAllowContract();
13443 }
13444 
13445 // Returns true if `N` can assume no infinities involved in its computation.
13446 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13447  return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
13448 }
13449 
13450 /// Try to perform FMA combining on a given FADD node.
13451 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13452  SDValue N0 = N->getOperand(0);
13453  SDValue N1 = N->getOperand(1);
13454  EVT VT = N->getValueType(0);
13455  SDLoc SL(N);
13456 
13457  const TargetOptions &Options = DAG.getTarget().Options;
13458 
13459  // Floating-point multiply-add with intermediate rounding.
13460  bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13461 
13462  // Floating-point multiply-add without intermediate rounding.
13463  bool HasFMA =
13465  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13466 
13467  // No valid opcode, do not combine.
13468  if (!HasFMAD && !HasFMA)
13469  return SDValue();
13470 
13471  bool CanReassociate =
13472  Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13473  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13474  Options.UnsafeFPMath || HasFMAD);
13475  // If the addition is not contractable, do not combine.
13476  if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13477  return SDValue();
13478 
13479  if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13480  return SDValue();
13481 
13482  // Always prefer FMAD to FMA for precision.
13483  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13484  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13485 
13486  auto isFusedOp = [&](SDValue N) {
13487  unsigned Opcode = N.getOpcode();
13488  return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13489  };
13490 
13491  // Is the node an FMUL and contractable either due to global flags or
13492  // SDNodeFlags.
13493  auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13494  if (N.getOpcode() != ISD::FMUL)
13495  return false;
13496  return AllowFusionGlobally || N->getFlags().hasAllowContract();
13497  };
13498  // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13499  // prefer to fold the multiply with fewer uses.
13500  if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13501  if (N0.getNode()->use_size() > N1.getNode()->use_size())
13502  std::swap(N0, N1);
13503  }
13504 
13505  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13506  if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13507  return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13508  N0.getOperand(1), N1);
13509  }
13510 
13511  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13512  // Note: Commutes FADD operands.
13513  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13514  return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13515  N1.getOperand(1), N0);
13516  }
13517 
13518  // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13519  // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13520  // This requires reassociation because it changes the order of operations.
13521  SDValue FMA, E;
13522  if (CanReassociate && isFusedOp(N0) &&
13523  N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13524  N0.getOperand(2).hasOneUse()) {
13525  FMA = N0;
13526  E = N1;
13527  } else if (CanReassociate && isFusedOp(N1) &&
13528  N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13529  N1.getOperand(2).hasOneUse()) {
13530  FMA = N1;
13531  E = N0;
13532  }
13533  if (FMA && E) {
13534  SDValue A = FMA.getOperand(0);
13535  SDValue B = FMA.getOperand(1);
13536  SDValue C = FMA.getOperand(2).getOperand(0);
13537  SDValue D = FMA.getOperand(2).getOperand(1);
13538  SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13539  return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13540  }
13541 
13542  // Look through FP_EXTEND nodes to do more combining.
13543 
13544  // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13545  if (N0.getOpcode() == ISD::FP_EXTEND) {
13546  SDValue N00 = N0.getOperand(0);
13547  if (isContractableFMUL(N00) &&
13548  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13549  N00.getValueType())) {
13550  return DAG.getNode(PreferredFusedOpcode, SL, VT,
13551  DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13552  DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13553  N1);
13554  }
13555  }
13556 
13557  // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13558  // Note: Commutes FADD operands.
13559  if (N1.getOpcode() == ISD::FP_EXTEND) {
13560  SDValue N10 = N1.getOperand(0);
13561  if (isContractableFMUL(N10) &&
13562  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13563  N10.getValueType())) {
13564  return DAG.getNode(PreferredFusedOpcode, SL, VT,
13565  DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13566  DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13567  N0);
13568  }
13569  }
13570 
13571  // More folding opportunities when target permits.
13572  if (Aggressive) {
13573  // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13574  // -> (fma x, y, (fma (fpext u), (fpext v), z))
13575  auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13576  SDValue Z) {
13577  return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13578  DAG.getNode(PreferredFusedOpcode, SL, VT,
13579  DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13580  DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13581  Z));
13582  };
13583  if (isFusedOp(N0)) {
13584  SDValue N02 = N0.getOperand(2);
13585  if (N02.getOpcode() == ISD::FP_EXTEND) {
13586  SDValue N020 = N02.getOperand(0);
13587  if (isContractableFMUL(N020) &&
13588  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13589  N020.getValueType())) {
13590  return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13591  N020.getOperand(0), N020.getOperand(1),
13592  N1);
13593  }
13594  }
13595  }
13596 
13597  // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13598  // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13599  // FIXME: This turns two single-precision and one double-precision
13600  // operation into two double-precision operations, which might not be
13601  // interesting for all targets, especially GPUs.
13602  auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13603  SDValue Z) {
13604  return DAG.getNode(
13605  PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13606  DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13607  DAG.getNode(PreferredFusedOpcode, SL, VT,
13608  DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13609  DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13610  };
13611  if (N0.getOpcode() == ISD::FP_EXTEND) {
13612  SDValue N00 = N0.getOperand(0);
13613  if (isFusedOp(N00)) {
13614  SDValue N002 = N00.getOperand(2);
13615  if (isContractableFMUL(N002) &&
13616  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13617  N00.getValueType())) {
13618  return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13619  N002.getOperand(0), N002.getOperand(1),
13620  N1);
13621  }
13622  }
13623  }
13624 
13625  // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13626  // -> (fma y, z, (fma (fpext u), (fpext v), x))
13627  if (isFusedOp(N1)) {
13628  SDValue N12 = N1.getOperand(2);
13629  if (N12.getOpcode() == ISD::FP_EXTEND) {
13630  SDValue N120 = N12.getOperand(0);
13631  if (isContractableFMUL(N120) &&
13632  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13633  N120.getValueType())) {
13634  return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13635  N120.getOperand(0), N120.getOperand(1),
13636  N0);
13637  }
13638  }
13639  }
13640 
13641  // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13642  // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13643  // FIXME: This turns two single-precision and one double-precision
13644  // operation into two double-precision operations, which might not be
13645  // interesting for all targets, especially GPUs.
13646  if (N1.getOpcode() == ISD::FP_EXTEND) {
13647  SDValue N10 = N1.getOperand(0);
13648  if (isFusedOp(N10)) {
13649  SDValue N102 = N10.getOperand(2);
13650  if (isContractableFMUL(N102) &&
13651  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13652  N10.getValueType())) {
13653  return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13654  N102.getOperand(0), N102.getOperand(1),
13655  N0);
13656  }
13657  }
13658  }
13659  }
13660 
13661  return SDValue();
13662 }
13663 
13664 /// Try to perform FMA combining on a given FSUB node.
13665 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13666  SDValue N0 = N->getOperand(0);
13667  SDValue N1 = N->getOperand(1);
13668  EVT VT = N->getValueType(0);
13669  SDLoc SL(N);
13670 
13671  const TargetOptions &Options = DAG.getTarget().Options;
13672  // Floating-point multiply-add with intermediate rounding.
13673  bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13674 
13675  // Floating-point multiply-add without intermediate rounding.
13676  bool HasFMA =
13678  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13679 
13680  // No valid opcode, do not combine.
13681  if (!HasFMAD && !HasFMA)
13682  return SDValue();
13683 
13684  const SDNodeFlags Flags = N->getFlags();
13685  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13686  Options.UnsafeFPMath || HasFMAD);
13687 
13688  // If the subtraction is not contractable, do not combine.
13689  if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13690  return SDValue();
13691 
13692  if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13693  return SDValue();
13694 
13695  // Always prefer FMAD to FMA for precision.
13696  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13697  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13698  bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13699 
13700  // Is the node an FMUL and contractable either due to global flags or
13701  // SDNodeFlags.
13702  auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13703  if (N.getOpcode() != ISD::FMUL)
13704  return false;
13705  return AllowFusionGlobally || N->getFlags().hasAllowContract();
13706  };
13707 
13708  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13709  auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13710  if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13711  return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13712  XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13713  }
13714  return SDValue();
13715  };
13716 
13717  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13718  // Note: Commutes FSUB operands.
13719  auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13720  if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13721  return DAG.getNode(PreferredFusedOpcode, SL, VT,
13722  DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13723  YZ.getOperand(1), X);
13724  }
13725  return SDValue();
13726  };
13727 
13728  // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13729  // prefer to fold the multiply with fewer uses.
13730  if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13731  (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13732  // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13733  if (SDValue V = tryToFoldXSubYZ(N0, N1))
13734  return V;
13735  // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13736  if (SDValue V = tryToFoldXYSubZ(N0, N1))
13737  return V;
13738  } else {
13739  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13740  if (SDValue V = tryToFoldXYSubZ(N0, N1))
13741  return V;
13742  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13743  if (SDValue V = tryToFoldXSubYZ(N0, N1))
13744  return V;
13745  }
13746 
13747  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13748  if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13749  (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13750  SDValue N00 = N0.getOperand(0).getOperand(0);
13751  SDValue N01 = N0.getOperand(0).getOperand(1);
13752  return DAG.getNode(PreferredFusedOpcode, SL, VT,
13753  DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13754  DAG.getNode(ISD::FNEG, SL, VT, N1));
13755  }
13756 
13757  // Look through FP_EXTEND nodes to do more combining.
13758 
13759  // fold (fsub (fpext (fmul x, y)), z)
13760  // -> (fma (fpext x), (fpext y), (fneg z))
13761  if (N0.getOpcode() == ISD::FP_EXTEND) {
13762  SDValue N00 = N0.getOperand(0);
13763  if (isContractableFMUL(N00) &&
13764  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13765  N00.getValueType())) {
13766  return DAG.getNode(PreferredFusedOpcode, SL, VT,
13767  DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13768  DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13769  DAG.getNode(ISD::FNEG, SL, VT, N1));
13770  }
13771  }
13772 
13773  // fold (fsub x, (fpext (fmul y, z)))
13774  // -> (fma (fneg (fpext y)), (fpext z), x)
13775  // Note: Commutes FSUB operands.
13776  if (N1.getOpcode() == ISD::FP_EXTEND) {
13777  SDValue N10 = N1.getOperand(0);
13778  if (isContractableFMUL(N10) &&
13779  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13780  N10.getValueType())) {
13781  return DAG.getNode(
13782  PreferredFusedOpcode, SL, VT,
13783  DAG.getNode(ISD::FNEG, SL, VT,
13784  DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13785  DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13786  }
13787  }
13788 
13789  // fold (fsub (fpext (fneg (fmul, x, y))), z)
13790  // -> (fneg (fma (fpext x), (fpext y), z))
13791  // Note: This could be removed with appropriate canonicalization of the
13792  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13793  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13794  // from implementing the canonicalization in visitFSUB.
13795  if (N0.getOpcode() == ISD::FP_EXTEND) {
13796  SDValue N00 = N0.getOperand(0);
13797  if (N00.getOpcode() == ISD::FNEG) {
13798  SDValue N000 = N00.getOperand(0);
13799  if (isContractableFMUL(N000) &&
13800  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13801  N00.getValueType())) {
13802  return DAG.getNode(
13803  ISD::FNEG, SL, VT,
13804  DAG.getNode(PreferredFusedOpcode, SL, VT,
13805  DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13806  DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13807  N1));
13808  }
13809  }
13810  }
13811 
13812  // fold (fsub (fneg (fpext (fmul, x, y))), z)
13813  // -> (fneg (fma (fpext x)), (fpext y), z)
13814  // Note: This could be removed with appropriate canonicalization of the
13815  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13816  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13817  // from implementing the canonicalization in visitFSUB.
13818  if (N0.getOpcode() == ISD::FNEG) {
13819  SDValue N00 = N0.getOperand(0);
13820  if (N00.getOpcode() == ISD::FP_EXTEND) {
13821  SDValue N000 = N00.getOperand(0);
13822  if (isContractableFMUL(N000) &&
13823  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13824  N000.getValueType())) {
13825  return DAG.getNode(
13826  ISD::FNEG, SL, VT,
13827  DAG.getNode(PreferredFusedOpcode, SL, VT,
13828  DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13829  DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13830  N1));
13831  }
13832  }
13833  }
13834 
13835  auto isReassociable = [Options](SDNode *N) {
13836  return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13837  };
13838 
13839  auto isContractableAndReassociableFMUL = [isContractableFMUL,
13840  isReassociable](SDValue N) {
13841  return isContractableFMUL(N) && isReassociable(N.getNode());
13842  };
13843 
13844  auto isFusedOp = [&](SDValue N) {
13845  unsigned Opcode = N.getOpcode();
13846  return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13847  };
13848 
13849  // More folding opportunities when target permits.
13850  if (Aggressive && isReassociable(N)) {
13851  bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13852  // fold (fsub (fma x, y, (fmul u, v)), z)
13853  // -> (fma x, y (fma u, v, (fneg z)))
13854  if (CanFuse && isFusedOp(N0) &&
13855  isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13856  N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13857  return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13858  N0.getOperand(1),
13859  DAG.getNode(PreferredFusedOpcode, SL, VT,
13860  N0.getOperand(2).getOperand(0),
13861  N0.getOperand(2).getOperand(1),
13862  DAG.getNode(ISD::FNEG, SL, VT, N1)));
13863  }
13864 
13865  // fold (fsub x, (fma y, z, (fmul u, v)))
13866  // -> (fma (fneg y), z, (fma (fneg u), v, x))
13867  if (CanFuse && isFusedOp(N1) &&
13868  isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13869  N1->hasOneUse() && NoSignedZero) {
13870  SDValue N20 = N1.getOperand(2).getOperand(0);
13871  SDValue N21 = N1.getOperand(2).getOperand(1);
13872  return DAG.getNode(
13873  PreferredFusedOpcode, SL, VT,
13874  DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13875  DAG.getNode(PreferredFusedOpcode, SL, VT,
13876  DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13877  }
13878 
13879  // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13880  // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13881  if (isFusedOp(N0) && N0->hasOneUse()) {
13882  SDValue N02 = N0.getOperand(2);
13883  if (N02.getOpcode() == ISD::FP_EXTEND) {
13884  SDValue N020 = N02.getOperand(0);
13885  if (isContractableAndReassociableFMUL(N020) &&
13886  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13887  N020.getValueType())) {
13888  return DAG.getNode(
13889  PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13890  DAG.getNode(
13891  PreferredFusedOpcode, SL, VT,
13892  DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13893  DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13894  DAG.getNode(ISD::FNEG, SL, VT, N1)));
13895  }
13896  }
13897  }
13898 
13899  // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13900  // -> (fma (fpext x), (fpext y),
13901  // (fma (fpext u), (fpext v), (fneg z)))
13902  // FIXME: This turns two single-precision and one double-precision
13903  // operation into two double-precision operations, which might not be
13904  // interesting for all targets, especially GPUs.
13905  if (N0.getOpcode() == ISD::FP_EXTEND) {
13906  SDValue N00 = N0.getOperand(0);
13907  if (isFusedOp(N00)) {
13908  SDValue N002 = N00.getOperand(2);
13909  if (isContractableAndReassociableFMUL(N002) &&
13910  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13911  N00.getValueType())) {
13912  return DAG.getNode(
13913  PreferredFusedOpcode, SL, VT,
13914  DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13915  DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13916  DAG.getNode(
13917  PreferredFusedOpcode, SL, VT,
13918  DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13919  DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13920  DAG.getNode(ISD::FNEG, SL, VT, N1)));
13921  }
13922  }
13923  }
13924 
13925  // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13926  // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13927  if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13928  N1->hasOneUse()) {
13929  SDValue N120 = N1.getOperand(2).getOperand(0);
13930  if (isContractableAndReassociableFMUL(N120) &&
13931  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13932  N120.getValueType())) {
13933  SDValue N1200 = N120.getOperand(0);
13934  SDValue N1201 = N120.getOperand(1);
13935  return DAG.getNode(
13936  PreferredFusedOpcode, SL, VT,
13937  DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13938  DAG.getNode(PreferredFusedOpcode, SL, VT,
13939  DAG.getNode(ISD::FNEG, SL, VT,
13940  DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13941  DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13942  }
13943  }
13944 
13945  // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13946  // -> (fma (fneg (fpext y)), (fpext z),
13947  // (fma (fneg (fpext u)), (fpext v), x))
13948  // FIXME: This turns two single-precision and one double-precision
13949  // operation into two double-precision operations, which might not be
13950  // interesting for all targets, especially GPUs.
13951  if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
13952  SDValue CvtSrc = N1.getOperand(0);
13953  SDValue N100 = CvtSrc.getOperand(0);
13954  SDValue N101 = CvtSrc.getOperand(1);
13955  SDValue N102 = CvtSrc.getOperand(2);
13956  if (isContractableAndReassociableFMUL(N102) &&
13957  TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13958  CvtSrc.getValueType())) {
13959  SDValue N1020 = N102.getOperand(0);
13960  SDValue N1021 = N102.getOperand(1);
13961  return DAG.getNode(
13962  PreferredFusedOpcode, SL, VT,
13963  DAG.getNode(ISD::FNEG, SL, VT,
13964  DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13965  DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13966  DAG.getNode(PreferredFusedOpcode, SL, VT,
13967  DAG.getNode(ISD::FNEG, SL, VT,
13968  DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13969  DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13970  }
13971  }
13972  }
13973 
13974  return SDValue();
13975 }
13976 
13977 /// Try to perform FMA combining on a given FMUL node based on the distributive
13978 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13979 /// subtraction instead of addition).
13980 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13981  SDValue N0 = N->getOperand(0);
13982  SDValue N1 = N->getOperand(1);
13983  EVT VT = N->getValueType(0);
13984  SDLoc SL(N);
13985 
13986  assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13987 
13988  const TargetOptions &Options = DAG.getTarget().Options;
13989 
13990  // The transforms below are incorrect when x == 0 and y == inf, because the
13991  // intermediate multiplication produces a nan.
13992  SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
13993  if (!hasNoInfs(Options, FAdd))
13994  return SDValue();
13995 
13996  // Floating-point multiply-add without intermediate rounding.
13997  bool HasFMA =
14000  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14001 
14002  // Floating-point multiply-add with intermediate rounding. This can result
14003  // in a less precise result due to the changed rounding order.
14004  bool HasFMAD = Options.UnsafeFPMath &&
14005  (LegalOperations && TLI.isFMADLegal(DAG, N));
14006 
14007  // No valid opcode, do not combine.
14008  if (!HasFMAD && !HasFMA)
14009  return SDValue();
14010 
14011  // Always prefer FMAD to FMA for precision.
14012  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14013  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14014 
14015  // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
14016  // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
14017  auto FuseFADD = [&](SDValue X, SDValue Y) {
14018  if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
14019  if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
14020  if (C->isExactlyValue(+1.0))
14021  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14022  Y);
14023  if (C->isExactlyValue(-1.0))
14024  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14025  DAG.getNode(ISD::FNEG, SL, VT, Y));
14026  }
14027  }
14028  return SDValue();
14029  };
14030 
14031  if (SDValue FMA = FuseFADD(N0, N1))
14032  return FMA;
14033  if (SDValue FMA = FuseFADD(N1, N0))
14034  return FMA;
14035 
14036  // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
14037  // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
14038  // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
14039  // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
14040  auto FuseFSUB = [&](SDValue X, SDValue Y) {
14041  if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
14042  if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
14043  if (C0->isExactlyValue(+1.0))
14044  return DAG.getNode(PreferredFusedOpcode, SL, VT,
14045  DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14046  Y);
14047  if (C0->isExactlyValue(-1.0))
14048  return DAG.getNode(PreferredFusedOpcode, SL, VT,
14049  DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14050  DAG.getNode(ISD::FNEG, SL, VT, Y));
14051  }
14052  if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
14053  if (C1->isExactlyValue(+1.0))
14054  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14055  DAG.getNode(ISD::FNEG, SL, VT, Y));
14056  if (C1->isExactlyValue(-1.0))
14057  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14058  Y);
14059  }
14060  }
14061  return SDValue();
14062  };
14063 
14064  if (SDValue FMA = FuseFSUB(N0, N1))
14065  return FMA;
14066  if (SDValue FMA = FuseFSUB(N1, N0))
14067  return FMA;
14068 
14069  return SDValue();
14070 }
14071 
14072 SDValue DAGCombiner::visitFADD(SDNode *N) {
14073  SDValue N0 = N->getOperand(0);
14074  SDValue N1 = N->getOperand(1);
14075  bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14076  bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14077  EVT VT = N->getValueType(0);
14078  SDLoc DL(N);
14079  const TargetOptions &Options = DAG.getTarget().Options;
14080  SDNodeFlags Flags = N->getFlags();
14081  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14082 
14083  if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14084  return R;
14085 
14086  // fold (fadd c1, c2) -> c1 + c2
14087  if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
14088  return C;
14089 
14090  // canonicalize constant to RHS
14091  if (N0CFP && !N1CFP)
14092  return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
14093 
14094  // fold vector ops
14095  if (VT.isVector())
14096  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14097  return FoldedVOp;
14098 
14099  // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
14100  ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
14101  if (N1C && N1C->isZero())
14102  if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
14103  return N0;
14104 
14105  if (SDValue NewSel = foldBinOpIntoSelect(N))
14106  return NewSel;
14107 
14108  // fold (fadd A, (fneg B)) -> (fsub A, B)
14109  if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14110  if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14111  N1, DAG, LegalOperations, ForCodeSize))
14112  return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
14113 
14114  // fold (fadd (fneg A), B) -> (fsub B, A)
14115  if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14116  if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14117  N0, DAG, LegalOperations, ForCodeSize))
14118  return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
14119 
14120  auto isFMulNegTwo = [](SDValue FMul) {
14121  if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
14122  return false;
14123  auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
14124  return C && C->isExactlyValue(-2.0);
14125  };
14126 
14127  // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14128  if (isFMulNegTwo(N0)) {
14129  SDValue B = N0.getOperand(0);
14130  SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14131  return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14132  }
14133  // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14134  if (isFMulNegTwo(N1)) {
14135  SDValue B = N1.getOperand(0);
14136  SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14137  return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14138  }
14139 
14140  // No FP constant should be created after legalization as Instruction
14141  // Selection pass has a hard time dealing with FP constants.
14142  bool AllowNewConst = (Level < AfterLegalizeDAG);
14143 
14144  // If nnan is enabled, fold lots of things.
14145  if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14146  // If allowed, fold (fadd (fneg x), x) -> 0.0
14147  if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14148  return DAG.getConstantFP(0.0, DL, VT);
14149 
14150  // If allowed, fold (fadd x, (fneg x)) -> 0.0
14151  if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14152  return DAG.getConstantFP(0.0, DL, VT);
14153  }
14154 
14155  // If 'unsafe math' or reassoc and nsz, fold lots of things.
14156  // TODO: break out portions of the transformations below for which Unsafe is
14157  // considered and which do not require both nsz and reassoc
14158  if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14159  (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14160  AllowNewConst) {
14161  // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14162  if (N1CFP && N0.getOpcode() == ISD::FADD &&
14164  SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14165  return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14166  }
14167 
14168  // We can fold chains of FADD's of the same value into multiplications.
14169  // This transform is not safe in general because we are reducing the number
14170  // of rounding steps.
14171  if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14172  if (N0.getOpcode() == ISD::FMUL) {
14173  bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14174  bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
14175 
14176  // (fadd (fmul x, c), x) -> (fmul x, c+1)
14177  if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14178  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14179  DAG.getConstantFP(1.0, DL, VT));
14180  return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14181  }
14182 
14183  // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14184  if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14185  N1.getOperand(0) == N1.getOperand(1) &&
14186  N0.getOperand(0) == N1.getOperand(0)) {
14187  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14188  DAG.getConstantFP(2.0, DL, VT));
14189  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14190  }
14191  }
14192 
14193  if (N1.getOpcode() == ISD::FMUL) {
14194  bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14195  bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14196 
14197  // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14198  if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14199  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14200  DAG.getConstantFP(1.0, DL, VT));
14201  return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14202  }
14203 
14204  // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14205  if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14206  N0.getOperand(0) == N0.getOperand(1) &&
14207  N1.getOperand(0) == N0.getOperand(0)) {
14208  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14209  DAG.getConstantFP(2.0, DL, VT));
14210  return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14211  }
14212  }
14213 
14214  if (N0.getOpcode() == ISD::FADD) {
14215  bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14216  // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14217  if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14218  (N0.getOperand(0) == N1)) {
14219  return DAG.getNode(ISD::FMUL, DL, VT, N1,
14220  DAG.getConstantFP(3.0, DL, VT));
14221  }
14222  }
14223 
14224  if (N1.getOpcode() == ISD::FADD) {
14225  bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14226  // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14227  if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14228  N1.getOperand(0) == N0) {
14229  return DAG.getNode(ISD::FMUL, DL, VT, N0,
14230  DAG.getConstantFP(3.0, DL, VT));
14231  }
14232  }
14233 
14234  // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14235  if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14236  N0.getOperand(0) == N0.getOperand(1) &&
14237  N1.getOperand(0) == N1.getOperand(1) &&
14238  N0.getOperand(0) == N1.getOperand(0)) {
14239  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14240  DAG.getConstantFP(4.0, DL, VT));
14241  }
14242  }
14243  } // enable-unsafe-fp-math
14244 
14245  // FADD -> FMA combines:
14246  if (SDValue Fused = visitFADDForFMACombine(N)) {
14247  AddToWorklist(Fused.getNode());
14248  return Fused;
14249  }
14250  return SDValue();
14251 }
14252 
14253 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14254  SDValue Chain = N->getOperand(0);
14255  SDValue N0 = N->getOperand(1);
14256  SDValue N1 = N->getOperand(2);
14257  EVT VT = N->getValueType(0);
14258  EVT ChainVT = N->getValueType(1);
14259  SDLoc DL(N);
14260  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14261 
14262  // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14263  if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14264  if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14265  N1, DAG, LegalOperations, ForCodeSize)) {
14266  return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14267  {Chain, N0, NegN1});
14268  }
14269 
14270  // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14271  if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14272  if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14273  N0, DAG, LegalOperations, ForCodeSize)) {
14274  return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14275  {Chain, N1, NegN0});
14276  }
14277  return SDValue();
14278 }
14279 
14280 SDValue DAGCombiner::visitFSUB(SDNode *N) {
14281  SDValue N0 = N->getOperand(0);
14282  SDValue N1 = N->getOperand(1);
14283  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
14284  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14285  EVT VT = N->getValueType(0);
14286  SDLoc DL(N);
14287  const TargetOptions &Options = DAG.getTarget().Options;
14288  const SDNodeFlags Flags = N->getFlags();
14289  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14290 
14291  if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14292  return R;
14293 
14294  // fold (fsub c1, c2) -> c1-c2
14295  if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14296  return C;
14297 
14298  // fold vector ops
14299  if (VT.isVector())
14300  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14301  return FoldedVOp;
14302 
14303  if (SDValue NewSel = foldBinOpIntoSelect(N))
14304  return NewSel;
14305 
14306  // (fsub A, 0) -> A
14307  if (N1CFP && N1CFP->isZero()) {
14308  if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14309  Flags.hasNoSignedZeros()) {
14310  return N0;
14311  }
14312  }
14313 
14314  if (N0 == N1) {
14315  // (fsub x, x) -> 0.0
14316  if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14317  return DAG.getConstantFP(0.0f, DL, VT);
14318  }
14319 
14320  // (fsub -0.0, N1) -> -N1
14321  if (N0CFP && N0CFP->isZero()) {
14322  if (N0CFP->isNegative() ||
14323  (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14324  // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14325  // flushed to zero, unless all users treat denorms as zero (DAZ).
14326  // FIXME: This transform will change the sign of a NaN and the behavior
14327  // of a signaling NaN. It is only valid when a NoNaN flag is present.
14328  DenormalMode DenormMode = DAG.getDenormalMode(VT);
14329  if (DenormMode == DenormalMode::getIEEE()) {
14330  if (SDValue NegN1 =
14331  TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14332  return NegN1;
14333  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14334  return DAG.getNode(ISD::FNEG, DL, VT, N1);
14335  }
14336  }
14337  }
14338 
14339  if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14340  (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14341  N1.getOpcode() == ISD::FADD) {
14342  // X - (X + Y) -> -Y
14343  if (N0 == N1->getOperand(0))
14344  return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14345  // X - (Y + X) -> -Y
14346  if (N0 == N1->getOperand(1))
14347  return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14348  }
14349 
14350  // fold (fsub A, (fneg B)) -> (fadd A, B)
14351  if (SDValue NegN1 =
14352  TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14353  return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14354 
14355  // FSUB -> FMA combines:
14356  if (SDValue Fused = visitFSUBForFMACombine(N)) {
14357  AddToWorklist(Fused.getNode());
14358  return Fused;
14359  }
14360 
14361  return SDValue();
14362 }
14363 
14364 SDValue DAGCombiner::visitFMUL(SDNode *N) {
14365  SDValue N0 = N->getOperand(0);
14366  SDValue N1 = N->getOperand(1);
14367  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14368  EVT VT = N->getValueType(0);
14369  SDLoc DL(N);
14370  const TargetOptions &Options = DAG.getTarget().Options;
14371  const SDNodeFlags Flags = N->getFlags();
14372  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14373 
14374  if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14375  return R;
14376 
14377  // fold (fmul c1, c2) -> c1*c2
14378  if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14379  return C;
14380 
14381  // canonicalize constant to RHS
14384  return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14385 
14386  // fold vector ops
14387  if (VT.isVector())
14388  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14389  return FoldedVOp;
14390 
14391  if (SDValue NewSel = foldBinOpIntoSelect(N))
14392  return NewSel;
14393 
14394  if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14395  // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14397  N0.getOpcode() == ISD::FMUL) {
14398  SDValue N00 = N0.getOperand(0);
14399  SDValue N01 = N0.getOperand(1);
14400  // Avoid an infinite loop by making sure that N00 is not a constant
14401  // (the inner multiply has not been constant folded yet).
14402  if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
14404  SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14405  return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14406  }
14407  }
14408 
14409  // Match a special-case: we convert X * 2.0 into fadd.
14410  // fmul (fadd X, X), C -> fmul X, 2.0 * C
14411  if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14412  N0.getOperand(0) == N0.getOperand(1)) {
14413  const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14414  SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14415  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14416  }
14417  }
14418 
14419  // fold (fmul X, 2.0) -> (fadd X, X)
14420  if (N1CFP && N1CFP->isExactlyValue(+2.0))
14421  return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14422 
14423  // fold (fmul X, -1.0) -> (fsub -0.0, X)
14424  if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14425  if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14426  return DAG.getNode(ISD::FSUB, DL, VT,
14427  DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14428  }
14429  }
14430 
14431  // -N0 * -N1 --> N0 * N1
14436  SDValue NegN0 =
14437  TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14438  SDValue NegN1 =
14439  TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14440  if (NegN0 && NegN1 &&
14443  return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14444 
14445  // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14446  // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14447  if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14448  (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14449  TLI.isOperationLegal(ISD::FABS, VT)) {
14450  SDValue Select = N0, X = N1;
14451  if (Select.getOpcode() != ISD::SELECT)
14452  std::swap(Select, X);
14453 
14454  SDValue Cond = Select.getOperand(0);
14455  auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14456  auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14457 
14458  if (TrueOpnd && FalseOpnd &&
14459  Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14460  isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14461  cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14462  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14463  switch (CC) {
14464  default: break;
14465  case ISD::SETOLT:
14466  case ISD::SETULT:
14467  case ISD::SETOLE:
14468  case ISD::SETULE:
14469  case ISD::SETLT:
14470  case ISD::SETLE:
14471  std::swap(TrueOpnd, FalseOpnd);
14473  case ISD::SETOGT:
14474  case ISD::SETUGT:
14475  case ISD::SETOGE:
14476  case ISD::SETUGE:
14477  case ISD::SETGT:
14478  case ISD::SETGE:
14479  if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14480  TLI.isOperationLegal(ISD::FNEG, VT))
14481  return DAG.getNode(ISD::FNEG, DL, VT,
14482  DAG.getNode(ISD::FABS, DL, VT, X));
14483  if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14484  return DAG.getNode(ISD::FABS, DL, VT, X);
14485 
14486  break;
14487  }
14488  }
14489  }
14490 
14491  // FMUL -> FMA combines:
14492  if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14493  AddToWorklist(Fused.getNode());
14494  return Fused;
14495  }
14496 
14497  return SDValue();
14498 }
14499 
14500 SDValue DAGCombiner::visitFMA(SDNode *N) {
14501  SDValue N0 = N->getOperand(0);
14502  SDValue N1 = N->getOperand(1);
14503  SDValue N2 = N->getOperand(2);
14504  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14505  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14506  EVT VT = N->getValueType(0);
14507  SDLoc DL(N);
14508  const TargetOptions &Options = DAG.getTarget().Options;
14509  // FMA nodes have flags that propagate to the created nodes.
14510  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14511 
14512  bool UnsafeFPMath =
14513  Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14514 
14515  // Constant fold FMA.
14516  if (isa<ConstantFPSDNode>(N0) &&
14517  isa<ConstantFPSDNode>(N1) &&
14518  isa<ConstantFPSDNode>(N2)) {
14519  return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14520  }
14521 
14522  // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14527  SDValue NegN0 =
14528  TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14529  SDValue NegN1 =
14530  TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14531  if (NegN0 && NegN1 &&
14534  return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14535 
14536  if (UnsafeFPMath) {
14537  if (N0CFP && N0CFP->isZero())
14538  return N2;
14539  if (N1CFP && N1CFP->isZero())
14540  return N2;
14541  }
14542 
14543  if (N0CFP && N0CFP->isExactlyValue(1.0))
14544  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14545  if (N1CFP && N1CFP->isExactlyValue(1.0))
14546  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14547 
14548  // Canonicalize (fma c, x, y) -> (fma x, c, y)
14551  return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14552 
14553  if (UnsafeFPMath) {
14554  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14555  if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14558  return DAG.getNode(ISD::FMUL, DL, VT, N0,
14559  DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14560  }
14561 
14562  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14563  if (N0.getOpcode() == ISD::FMUL &&
14566  return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14567  DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14568  N2);
14569  }
14570  }
14571 
14572  // (fma x, -1, y) -> (fadd (fneg x), y)
14573  if (N1CFP) {
14574  if (N1CFP->isExactlyValue(1.0))
14575  return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14576 
14577  if (N1CFP->isExactlyValue(-1.0) &&
14578  (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14579  SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14580  AddToWorklist(RHSNeg.getNode());
14581  return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14582  }
14583 
14584  // fma (fneg x), K, y -> fma x -K, y
14585  if (N0.getOpcode() == ISD::FNEG &&
14586  (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14587  (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14588  ForCodeSize)))) {
14589  return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14590  DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14591  }
14592  }
14593 
14594  if (UnsafeFPMath) {
14595  // (fma x, c, x) -> (fmul x, (c+1))
14596  if (N1CFP && N0 == N2) {
14597  return DAG.getNode(
14598  ISD::FMUL, DL, VT, N0,
14599  DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14600  }
14601 
14602  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14603  if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14604  return DAG.getNode(
14605  ISD::FMUL, DL, VT, N0,
14606  DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14607  }
14608  }
14609 
14610  // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14611  // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14612  if (!TLI.isFNegFree(VT))
14613  if (SDValue Neg = TLI.getCheaperNegatedExpression(
14614  SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14615  return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14616  return SDValue();
14617 }
14618 
14619 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14620 // reciprocal.
14621 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14622 // Notice that this is not always beneficial. One reason is different targets
14623 // may have different costs for FDIV and FMUL, so sometimes the cost of two
14624 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14625 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14626 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14627  // TODO: Limit this transform based on optsize/minsize - it always creates at
14628  // least 1 extra instruction. But the perf win may be substantial enough
14629  // that only minsize should restrict this.
14630  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14631  const SDNodeFlags Flags = N->getFlags();
14632  if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14633  return SDValue();
14634 
14635  // Skip if current node is a reciprocal/fneg-reciprocal.
14636  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14637  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14638  if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14639  return SDValue();
14640 
14641  // Exit early if the target does not want this transform or if there can't
14642  // possibly be enough uses of the divisor to make the transform worthwhile.
14643  unsigned MinUses = TLI.combineRepeatedFPDivisors();
14644 
14645  // For splat vectors, scale the number of uses by the splat factor. If we can
14646  // convert the division into a scalar op, that will likely be much faster.
14647  unsigned NumElts = 1;
14648  EVT VT = N->getValueType(0);
14649  if (VT.isVector() && DAG.isSplatValue(N1))
14650  NumElts = VT.getVectorMinNumElements();
14651 
14652  if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14653  return SDValue();
14654 
14655  // Find all FDIV users of the same divisor.
14656  // Use a set because duplicates may be present in the user list.
14658  for (auto *U : N1->uses()) {
14659  if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14660  // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14661  if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14662  U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14663  U->getFlags().hasAllowReassociation() &&
14664  U->getFlags().hasNoSignedZeros())
14665  continue;
14666 
14667  // This division is eligible for optimization only if global unsafe math
14668  // is enabled or if this division allows reciprocal formation.
14669  if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14670  Users.insert(U);
14671  }
14672  }
14673 
14674  // Now that we have the actual number of divisor uses, make sure it meets
14675  // the minimum threshold specified by the target.
14676  if ((Users.size() * NumElts) < MinUses)
14677  return SDValue();
14678 
14679  SDLoc DL(N);
14680  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14681  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14682 
14683  // Dividend / Divisor -> Dividend * Reciprocal
14684  for (auto *U : Users) {
14685  SDValue Dividend = U->getOperand(0);
14686  if (Dividend != FPOne) {
14687  SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14688  Reciprocal, Flags);
14689  CombineTo(U, NewNode);
14690  } else if (U != Reciprocal.getNode()) {
14691  // In the absence of fast-math-flags, this user node is always the
14692  // same node as Reciprocal, but with FMF they may be different nodes.
14693  CombineTo(U, Reciprocal);
14694  }
14695  }
14696  return SDValue(N, 0); // N was replaced.
14697 }
14698 
14699 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14700  SDValue N0 = N->getOperand(0);
14701  SDValue N1 = N->getOperand(1);
14702  EVT VT = N->getValueType(0);
14703  SDLoc DL(N);
14704  const TargetOptions &Options = DAG.getTarget().Options;
14705  SDNodeFlags Flags = N->getFlags();
14706  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14707 
14708  if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14709  return R;
14710 
14711  // fold (fdiv c1, c2) -> c1/c2
14712  if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
14713  return C;
14714 
14715  // fold vector ops
14716  if (VT.isVector())
14717  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14718  return FoldedVOp;
14719 
14720  if (SDValue NewSel = foldBinOpIntoSelect(N))
14721  return NewSel;
14722 
14724  return V;
14725 
14726  if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14727  // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14728  if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14729  // Compute the reciprocal 1.0 / c2.
14730  const APFloat &N1APF = N1CFP->getValueAPF();
14731  APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14732  APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14733  // Only do the transform if the reciprocal is a legal fp immediate that
14734  // isn't too nasty (eg NaN, denormal, ...).
14735  if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14736  (!LegalOperations ||
14737  // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14738  // backend)... we should handle this gracefully after Legalize.
14739  // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14740  TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14741  TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14742  return DAG.getNode(ISD::FMUL, DL, VT, N0,
14743  DAG.getConstantFP(Recip, DL, VT));
14744  }
14745 
14746  // If this FDIV is part of a reciprocal square root, it may be folded
14747  // into a target-specific square root estimate instruction.
14748  if (N1.getOpcode() == ISD::FSQRT) {
14749  if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14750  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14751  } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14752  N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14753  if (SDValue RV =
14754  buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14755  RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14756  AddToWorklist(RV.getNode());
14757  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14758  }
14759  } else if (N1.getOpcode() == ISD::FP_ROUND &&
14760  N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14761  if (SDValue RV =
14762  buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14763  RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14764  AddToWorklist(RV.getNode());
14765  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14766  }
14767  } else if (N1.getOpcode() == ISD::FMUL) {
14768  // Look through an FMUL. Even though this won't remove the FDIV directly,
14769  // it's still worthwhile to get rid of the FSQRT if possible.
14770  SDValue Sqrt, Y;
14771  if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14772  Sqrt = N1.getOperand(0);
14773  Y = N1.getOperand(1);
14774  } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14775  Sqrt = N1.getOperand(1);
14776  Y = N1.getOperand(0);
14777  }
14778  if (Sqrt.getNode()) {
14779  // If the other multiply operand is known positive, pull it into the
14780  // sqrt. That will eliminate the division if we convert to an estimate.
14781  if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14782  N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14783  SDValue A;
14784  if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14785  A = Y.getOperand(0);
14786  else if (Y == Sqrt.getOperand(0))
14787  A = Y;
14788  if (A) {
14789  // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14790  // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14791  SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14792  SDValue AAZ =
14793  DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14794  if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14795  return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14796 
14797  // Estimate creation failed. Clean up speculatively created nodes.
14798  recursivelyDeleteUnusedNodes(AAZ.getNode());
14799  }
14800  }
14801 
14802  // We found a FSQRT, so try to make this fold:
14803  // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14804  if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14805  SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14806  AddToWorklist(Div.getNode());
14807  return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14808  }
14809  }
14810  }
14811 
14812  // Fold into a reciprocal estimate and multiply instead of a real divide.
14813  if (Options.NoInfsFPMath || Flags.hasNoInfs())
14814  if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14815  return RV;
14816  }
14817 
14818  // Fold X/Sqrt(X) -> Sqrt(X)
14819  if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14820  (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14821  if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14822  return N1;
14823 
14824  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14829  SDValue NegN0 =
14830  TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14831  SDValue NegN1 =
14832  TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14833  if (NegN0 && NegN1 &&
14836  return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14837 
14838  return SDValue();
14839 }
14840 
14841 SDValue DAGCombiner::visitFREM(SDNode *N) {
14842  SDValue N0 = N->getOperand(0);
14843  SDValue N1 = N->getOperand(1);
14844  EVT VT = N->getValueType(0);
14845  SDNodeFlags Flags = N->getFlags();
14846  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14847 
14848  if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14849  return R;
14850 
14851  // fold (frem c1, c2) -> fmod(c1,c2)
14852  if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
14853  return C;
14854 
14855  if (SDValue NewSel = foldBinOpIntoSelect(N))
14856  return NewSel;
14857 
14858  return SDValue();
14859 }
14860 
14861 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14862  SDNodeFlags Flags = N->getFlags();
14863  const TargetOptions &Options = DAG.getTarget().Options;
14864 
14865  // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14866  // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14867  if (!Flags.hasApproximateFuncs() ||
14868  (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14869  return SDValue();
14870 
14871  SDValue N0 = N->getOperand(0);
14872  if (TLI.isFsqrtCheap(N0, DAG))
14873  return SDValue();
14874 
14875  // FSQRT nodes have flags that propagate to the created nodes.
14876  // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14877  // transform the fdiv, we may produce a sub-optimal estimate sequence
14878  // because the reciprocal calculation may not have to filter out a
14879  // 0.0 input.
14880  return buildSqrtEstimate(N0, Flags);
14881 }
14882 
14883 /// copysign(x, fp_extend(y)) -> copysign(x, y)
14884 /// copysign(x, fp_round(y)) -> copysign(x, y)
14886  SDValue N1 = N->getOperand(1);
14887  if ((N1.getOpcode() == ISD::FP_EXTEND ||
14888  N1.getOpcode() == ISD::FP_ROUND)) {
14889  EVT N1VT = N1->getValueType(0);
14890  EVT N1Op0VT = N1->getOperand(0).getValueType();
14891 
14892  // Always fold no-op FP casts.
14893  if (N1VT == N1Op0VT)
14894  return true;
14895 
14896  // Do not optimize out type conversion of f128 type yet.
14897  // For some targets like x86_64, configuration is changed to keep one f128
14898  // value in one SSE register, but instruction selection cannot handle
14899  // FCOPYSIGN on SSE registers yet.
14900  if (N1Op0VT == MVT::f128)
14901  return false;
14902 
14903  // Avoid mismatched vector operand types, for better instruction selection.
14904  if (N1Op0VT.isVector())
14905  return false;
14906 
14907  return true;
14908  }
14909  return false;
14910 }
14911 
14912 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14913  SDValue N0 = N->getOperand(0);
14914  SDValue N1 = N->getOperand(1);
14915  EVT VT = N->getValueType(0);
14916 
14917  // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
14918  if (SDValue C =
14919  DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
14920  return C;
14921 
14922  if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14923  const APFloat &V = N1C->getValueAPF();
14924  // copysign(x, c1) -> fabs(x) iff ispos(c1)
14925  // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14926  if (!V.isNegative()) {
14927  if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14928  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14929  } else {
14930  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14931  return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14932  DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14933  }
14934  }
14935 
14936  // copysign(fabs(x), y) -> copysign(x, y)
14937  // copysign(fneg(x), y) -> copysign(x, y)
14938  // copysign(copysign(x,z), y) -> copysign(x, y)
14939  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14940  N0.getOpcode() == ISD::FCOPYSIGN)
14941  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14942 
14943  // copysign(x, abs(y)) -> abs(x)
14944  if (N1.getOpcode() == ISD::FABS)
14945  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14946 
14947  // copysign(x, copysign(y,z)) -> copysign(x, z)
14948  if (N1.getOpcode() == ISD::FCOPYSIGN)
14949  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14950 
14951  // copysign(x, fp_extend(y)) -> copysign(x, y)
14952  // copysign(x, fp_round(y)) -> copysign(x, y)
14954  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14955 
14956  return SDValue();
14957 }
14958 
14959 SDValue DAGCombiner::visitFPOW(SDNode *N) {
14960  ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14961  if (!ExponentC)
14962  return SDValue();
14963  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14964 
14965  // Try to convert x ** (1/3) into cube root.
14966  // TODO: Handle the various flavors of long double.
14967  // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14968  // Some range near 1/3 should be fine.
14969  EVT VT = N->getValueType(0);
14970  if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14971  (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14972  // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14973  // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14974  // pow(-val, 1/3) = nan; cbrt(-val) = -num.
14975  // For regular numbers, rounding may cause the results to differ.
14976  // Therefore, we require { nsz ninf nnan afn } for this transform.
14977  // TODO: We could select out the special cases if we don't have nsz/ninf.
14978  SDNodeFlags Flags = N->getFlags();
14979  if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14980  !Flags.hasApproximateFuncs())
14981  return SDValue();
14982 
14983  // Do not create a cbrt() libcall if the target does not have it, and do not
14984  // turn a pow that has lowering support into a cbrt() libcall.
14985  if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14988  return SDValue();
14989 
14990  return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14991  }
14992 
14993  // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14994  // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14995  // TODO: This could be extended (using a target hook) to handle smaller
14996  // power-of-2 fractional exponents.
14997  bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14998  bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14999  if (ExponentIs025 || ExponentIs075) {
15000  // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
15001  // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
15002  // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
15003  // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
15004  // For regular numbers, rounding may cause the results to differ.
15005  // Therefore, we require { nsz ninf afn } for this transform.
15006  // TODO: We could select out the special cases if we don't have nsz/ninf.
15007  SDNodeFlags Flags = N->getFlags();
15008 
15009  // We only need no signed zeros for the 0.25 case.
15010  if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
15011  !Flags.hasApproximateFuncs())
15012  return SDValue();
15013 
15014  // Don't double the number of libcalls. We are trying to inline fast code.
15016  return SDValue();
15017 
15018  // Assume that libcalls are the smallest code.
15019  // TODO: This restriction should probably be lifted for vectors.
15020  if (ForCodeSize)
15021  return SDValue();
15022 
15023  // pow(X, 0.25) --> sqrt(sqrt(X))
15024  SDLoc DL(N);
15025  SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
15026  SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
15027  if (ExponentIs025)
15028  return SqrtSqrt;
15029  // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
15030  return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
15031  }
15032 
15033  return SDValue();
15034 }
15035 
15037  const TargetLowering &TLI) {
15038  // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
15039  // replacing casts with a libcall. We also must be allowed to ignore -0.0
15040  // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
15041  // conversions would return +0.0.
15042  // FIXME: We should be able to use node-level FMF here.
15043  // TODO: If strict math, should we use FABS (+ range check for signed cast)?
15044  EVT VT = N->getValueType(0);
15045  if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
15047  return SDValue();
15048 
15049  // fptosi/fptoui round towards zero, so converting from FP to integer and
15050  // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
15051  SDValue N0 = N->getOperand(0);
15052  if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
15053  N0.getOperand(0).getValueType() == VT)
15054  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15055 
15056  if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
15057  N0.getOperand(0).getValueType() == VT)
15058  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15059 
15060  return SDValue();
15061 }
15062 
15063 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
15064  SDValue N0 = N->getOperand(0);
15065  EVT VT = N->getValueType(0);
15066  EVT OpVT = N0.getValueType();
15067 
15068  // [us]itofp(undef) = 0, because the result value is bounded.
15069  if (N0.isUndef())
15070  return DAG.getConstantFP(0.0, SDLoc(N), VT);
15071 
15072  // fold (sint_to_fp c1) -> c1fp
15074  // ...but only if the target supports immediate floating-point values
15075  (!LegalOperations ||
15077  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15078 
15079  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
15080  // but UINT_TO_FP is legal on this target, try to convert.
15081  if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
15082  hasOperation(ISD::UINT_TO_FP, OpVT)) {
15083  // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
15084  if (DAG.SignBitIsZero(N0))
15085  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15086  }
15087 
15088  // The next optimizations are desirable only if SELECT_CC can be lowered.
15089  // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
15090  if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
15091  !VT.isVector() &&
15092  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15093  SDLoc DL(N);
15094  return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
15095  DAG.getConstantFP(0.0, DL, VT));
15096  }
15097 
15098  // fold (sint_to_fp (zext (setcc x, y, cc))) ->
15099  // (select (setcc x, y, cc), 1.0, 0.0)
15100  if (N0.getOpcode() == ISD::ZERO_EXTEND &&
15101  N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
15102  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15103  SDLoc DL(N);
15104  return DAG.getSelect(DL, VT, N0.getOperand(0),
15105  DAG.getConstantFP(1.0, DL, VT),
15106  DAG.getConstantFP(0.0, DL, VT));
15107  }
15108 
15109  if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15110  return FTrunc;
15111 
15112  return SDValue();
15113 }
15114 
15115 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
15116  SDValue N0 = N->getOperand(0);
15117  EVT VT = N->getValueType(0);
15118  EVT OpVT = N0.getValueType();
15119 
15120  // [us]itofp(undef) = 0, because the result value is bounded.
15121  if (N0.isUndef())
15122  return DAG.getConstantFP(0.0, SDLoc(N), VT);
15123 
15124  // fold (uint_to_fp c1) -> c1fp
15126  // ...but only if the target supports immediate floating-point values
15127  (!LegalOperations ||
15129  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15130 
15131  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15132  // but SINT_TO_FP is legal on this target, try to convert.
15133  if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
15134  hasOperation(ISD::SINT_TO_FP, OpVT)) {
15135  // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15136  if (DAG.SignBitIsZero(N0))
15137  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15138  }
15139 
15140  // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15141  if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15142  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15143  SDLoc DL(N);
15144  return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15145  DAG.getConstantFP(0.0, DL, VT));
15146  }
15147 
15148  if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15149  return FTrunc;
15150 
15151  return SDValue();
15152 }
15153 
15154 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15156  SDValue N0 = N->getOperand(0);
15157  EVT VT = N->getValueType(0);
15158 
15159  if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15160  return SDValue();
15161 
15162  SDValue Src = N0.getOperand(0);
15163  EVT SrcVT = Src.getValueType();
15164  bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
15165  bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15166 
15167  // We can safely assume the conversion won't overflow the output range,
15168  // because (for example) (uint8_t)18293.f is undefined behavior.
15169 
15170  // Since we can assume the conversion won't overflow, our decision as to
15171  // whether the input will fit in the float should depend on the minimum
15172  // of the input range and output range.
15173 
15174  // This means this is also safe for a signed input and unsigned output, since
15175  // a negative input would lead to undefined behavior.
15176  unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15177  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
15178  unsigned ActualSize = std::min(InputSize, OutputSize);
15179  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
15180 
15181  // We can only fold away the float conversion if the input range can be
15182  // represented exactly in the float range.
15183  if (APFloat::semanticsPrecision(sem) >= ActualSize) {
15184  if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15185  unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
15186  : ISD::ZERO_EXTEND;
15187  return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15188  }
15189  if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15190  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15191  return DAG.getBitcast(VT, Src);
15192  }
15193  return SDValue();
15194 }
15195 
15196 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15197  SDValue N0 = N->getOperand(0);
15198  EVT VT = N->getValueType(0);
15199 
15200  // fold (fp_to_sint undef) -> undef
15201  if (N0.isUndef())
15202  return DAG.getUNDEF(VT);
15203 
15204  // fold (fp_to_sint c1fp) -> c1
15206  return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15207 
15208  return FoldIntToFPToInt(N, DAG);
15209 }
15210 
15211 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15212  SDValue N0 = N->getOperand(0);
15213  EVT VT = N->getValueType(0);
15214 
15215  // fold (fp_to_uint undef) -> undef
15216  if (N0.isUndef())
15217  return DAG.getUNDEF(VT);
15218 
15219  // fold (fp_to_uint c1fp) -> c1
15221  return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15222 
15223  return FoldIntToFPToInt(N, DAG);
15224 }
15225 
15226 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15227  SDValue N0 = N->getOperand(0);
15228  SDValue N1 = N->getOperand(1);
15229  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15230  EVT VT = N->getValueType(0);
15231 
15232  // fold (fp_round c1fp) -> c1fp
15233  if (N0CFP)
15234  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15235 
15236  // fold (fp_round (fp_extend x)) -> x
15237  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15238  return N0.getOperand(0);
15239 
15240  // fold (fp_round (fp_round x)) -> (fp_round x)
15241  if (N0.getOpcode() == ISD::FP_ROUND) {
15242  const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15243  const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15244 
15245  // Skip this folding if it results in an fp_round from f80 to f16.
15246  //
15247  // f80 to f16 always generates an expensive (and as yet, unimplemented)
15248  // libcall to __truncxfhf2 instead of selecting native f16 conversion
15249  // instructions from f32 or f64. Moreover, the first (value-preserving)
15250  // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15251  // x86.
15252  if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15253  return SDValue();
15254 
15255  // If the first fp_round isn't a value preserving truncation, it might
15256  // introduce a tie in the second fp_round, that wouldn't occur in the
15257  // single-step fp_round we want to fold to.
15258  // In other words, double rounding isn't the same as rounding.
15259  // Also, this is a value preserving truncation iff both fp_round's are.
15260  if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15261  SDLoc DL(N);
15262  return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15263  DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
15264  }
15265  }
15266 
15267  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15268  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
15269  SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15270  N0.getOperand(0), N1);
15271  AddToWorklist(Tmp.getNode());
15272  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15273  Tmp, N0.getOperand(1));
15274  }
15275 
15276  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15277  return NewVSel;
15278 
15279  return SDValue();
15280 }
15281 
15282 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15283  SDValue N0 = N->getOperand(0);
15284  EVT VT = N->getValueType(0);
15285 
15286  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15287  if (N->hasOneUse() &&
15288  N->use_begin()->getOpcode() == ISD::FP_ROUND)
15289  return SDValue();
15290 
15291  // fold (fp_extend c1fp) -> c1fp
15293  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15294 
15295  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15296  if (N0.getOpcode() == ISD::FP16_TO_FP &&
15298  return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15299 
15300  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15301  // value of X.
15302  if (N0.getOpcode() == ISD::FP_ROUND
15303  && N0.getConstantOperandVal(1) == 1) {
15304  SDValue In = N0.getOperand(0);
15305  if (In.getValueType() == VT) return In;
15306  if (VT.bitsLT(In.getValueType()))
15307  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15308  In, N0.getOperand(1));
15309  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15310  }
15311 
15312  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15313  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15315  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15316  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
15317  LN0->getChain(),
15318  LN0->getBasePtr(), N0.getValueType(),
15319  LN0->getMemOperand());
15320  CombineTo(N, ExtLoad);
15321  CombineTo(N0.getNode(),
15322  DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
15323  N0.getValueType(), ExtLoad,
15324  DAG.getIntPtrConstant(1, SDLoc(N0))),
15325  ExtLoad.getValue(1));
15326  return SDValue(N, 0); // Return N so it doesn't get rechecked!
15327  }
15328 
15329  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15330  return NewVSel;
15331 
15332  return SDValue();
15333 }
15334 
15335 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15336  SDValue N0 = N->getOperand(0);
15337  EVT VT = N->getValueType(0);
15338 
15339  // fold (fceil c1) -> fceil(c1)
15341  return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15342 
15343  return SDValue();
15344 }
15345 
15346 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15347  SDValue N0 = N->getOperand(0);
15348  EVT VT = N->getValueType(0);
15349 
15350  // fold (ftrunc c1) -> ftrunc(c1)
15352  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15353 
15354  // fold ftrunc (known rounded int x) -> x
15355  // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15356  // likely to be generated to extract integer from a rounded floating value.
15357  switch (N0.getOpcode()) {
15358  default: break;
15359  case ISD::FRINT:
15360  case ISD::FTRUNC:
15361  case ISD::FNEARBYINT:
15362  case ISD::FFLOOR:
15363  case ISD::FCEIL:
15364  return N0;
15365  }
15366 
15367  return SDValue();
15368 }
15369 
15370 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15371  SDValue N0 = N->getOperand(0);
15372  EVT VT = N->getValueType(0);
15373 
15374  // fold (ffloor c1) -> ffloor(c1)
15376  return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15377 
15378  return SDValue();
15379 }
15380 
15381 SDValue DAGCombiner::visitFNEG(SDNode *N) {
15382  SDValue N0 = N->getOperand(0);
15383  EVT VT = N->getValueType(0);
15384  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15385 
15386  // Constant fold FNEG.
15388  return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15389 
15390  if (SDValue NegN0 =
15391  TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15392  return NegN0;
15393 
15394  // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15395  // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15396  // know it was called from a context with a nsz flag if the input fsub does
15397  // not.
15398  if (N0.getOpcode() == ISD::FSUB &&
15400  N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15401  return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15402  N0.getOperand(0));
15403  }
15404 
15405  if (SDValue Cast = foldSignChangeInBitcast(N))
15406  return Cast;
15407 
15408  return SDValue();
15409 }
15410 
15411 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15412  SDValue N0 = N->getOperand(0);
15413  SDValue N1 = N->getOperand(1);
15414  EVT VT = N->getValueType(0);
15415  const SDNodeFlags Flags = N->getFlags();
15416  unsigned Opc = N->getOpcode();
15417  bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15418  bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15419  SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15420 
15421  // Constant fold.
15422  if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15423  return C;
15424 
15425  // Canonicalize to constant on RHS.
15428  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15429 
15430  if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
15431  const APFloat &AF = N1CFP->getValueAPF();
15432 
15433  // minnum(X, nan) -> X
15434  // maxnum(X, nan) -> X
15435  // minimum(X, nan) -> nan
15436  // maximum(X, nan) -> nan
15437  if (AF.isNaN())
15438  return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15439 
15440  // In the following folds, inf can be replaced with the largest finite
15441  // float, if the ninf flag is set.
15442  if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15443  // minnum(X, -inf) -> -inf
15444  // maxnum(X, +inf) -> +inf
15445  // minimum(X, -inf) -> -inf if nnan
15446  // maximum(X, +inf) -> +inf if nnan
15447  if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15448  return N->getOperand(1);
15449 
15450  // minnum(X, +inf) -> X if nnan
15451  // maxnum(X, -inf) -> X if nnan
15452  // minimum(X, +inf) -> X
15453  // maximum(X, -inf) -> X
15454  if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15455  return N->getOperand(0);
15456  }
15457  }
15458 
15459  return SDValue();
15460 }
15461 
15462 SDValue DAGCombiner::visitFABS(SDNode *N) {
15463  SDValue N0 = N->getOperand(0);
15464  EVT VT = N->getValueType(0);
15465 
15466  // fold (fabs c1) -> fabs(c1)
15468  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15469 
15470  // fold (fabs (fabs x)) -> (fabs x)
15471  if (N0.getOpcode() == ISD::FABS)
15472  return N->getOperand(0);
15473 
15474  // fold (fabs (fneg x)) -> (fabs x)
15475  // fold (fabs (fcopysign x, y)) -> (fabs x)
15476  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15477  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15478 
15479  if (SDValue Cast = foldSignChangeInBitcast(N))
15480  return Cast;
15481 
15482  return SDValue();
15483 }
15484 
15485 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15486  SDValue Chain = N->getOperand(0);
15487  SDValue N1 = N->getOperand(1);
15488  SDValue N2 = N->getOperand(2);
15489 
15490  // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15491  // nondeterministic jumps).
15492  if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15493  return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15494  N1->getOperand(0), N2);
15495  }
15496 
15497  // If N is a constant we could fold this into a fallthrough or unconditional
15498  // branch. However that doesn't happen very often in normal code, because
15499  // Instcombine/SimplifyCFG should have handled the available opportunities.
15500  // If we did this folding here, it would be necessary to update the
15501  // MachineBasicBlock CFG, which is awkward.
15502 
15503  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15504  // on the target.
15505  if (N1.getOpcode() == ISD::SETCC &&
15507  N1.getOperand(0).getValueType())) {
15508  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15509  Chain, N1.getOperand(2),
15510  N1.getOperand(0), N1.getOperand(1), N2);
15511  }
15512 
15513  if (N1.hasOneUse()) {
15514  // rebuildSetCC calls visitXor which may change the Chain when there is a
15515  // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15516  HandleSDNode ChainHandle(Chain);
15517  if (SDValue NewN1 = rebuildSetCC(N1))
15518  return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15519  ChainHandle.getValue(), NewN1, N2);
15520  }
15521 
15522  return SDValue();
15523 }
15524 
15525 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15526  if (N.getOpcode() == ISD::SRL ||
15527  (N.getOpcode() == ISD::TRUNCATE &&
15528  (N.getOperand(0).hasOneUse() &&
15529  N.getOperand(0).getOpcode() == ISD::SRL))) {
15530  // Look pass the truncate.
15531  if (N.getOpcode() == ISD::TRUNCATE)
15532  N = N.getOperand(0);
15533 
15534  // Match this pattern so that we can generate simpler code:
15535  //
15536  // %a = ...
15537  // %b = and i32 %a, 2
15538  // %c = srl i32 %b, 1
15539  // brcond i32 %c ...
15540  //
15541  // into
15542  //
15543  // %a = ...
15544  // %b = and i32 %a, 2
15545  // %c = setcc eq %b, 0
15546  // brcond %c ...
15547  //
15548  // This applies only when the AND constant value has one bit set and the
15549  // SRL constant is equal to the log2 of the AND constant. The back-end is
15550  // smart enough to convert the result into a TEST/JMP sequence.
15551  SDValue Op0 = N.getOperand(0);
15552  SDValue Op1 = N.getOperand(1);
15553 
15554  if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15555  SDValue AndOp1 = Op0.getOperand(1);
15556 
15557  if (AndOp1.getOpcode() == ISD::Constant) {
15558  const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15559 
15560  if (AndConst.isPowerOf2() &&
15561  cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15562  SDLoc DL(N);
15563  return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15564  Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15565  ISD::SETNE);
15566  }
15567  }
15568  }
15569  }
15570 
15571  // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15572  // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15573  if (N.getOpcode() == ISD::XOR) {
15574  // Because we may call this on a speculatively constructed
15575  // SimplifiedSetCC Node, we need to simplify this node first.
15576  // Ideally this should be folded into SimplifySetCC and not
15577  // here. For now, grab a handle to N so we don't lose it from
15578  // replacements interal to the visit.
15579  HandleSDNode XORHandle(N);
15580  while (N.getOpcode() == ISD::XOR) {
15581  SDValue Tmp = visitXOR(N.getNode());
15582  // No simplification done.
15583  if (!Tmp.getNode())
15584  break;
15585  // Returning N is form in-visit replacement that may invalidated
15586  // N. Grab value from Handle.
15587  if (Tmp.getNode() == N.getNode())
15588  N = XORHandle.getValue();
15589  else // Node simplified. Try simplifying again.
15590  N = Tmp;
15591  }
15592 
15593  if (N.getOpcode() != ISD::XOR)
15594  return N;
15595 
15596  SDValue Op0 = N->getOperand(0);
15597  SDValue Op1 = N->getOperand(1);
15598 
15599  if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15600  bool Equal = false;
15601  // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15602  if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15603  Op0.getValueType() == MVT::i1) {
15604  N = Op0;
15605  Op0 = N->getOperand(0);
15606  Op1 = N->getOperand(1);
15607  Equal = true;
15608  }
15609 
15610  EVT SetCCVT = N.getValueType();
15611  if (LegalTypes)
15612  SetCCVT = getSetCCResultType(SetCCVT);
15613  // Replace the uses of XOR with SETCC
15614  return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15616  }
15617  }
15618 
15619  return SDValue();
15620 }
15621 
15622 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15623 //
15624 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15625  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15626  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15627 
15628  // If N is a constant we could fold this into a fallthrough or unconditional
15629  // branch. However that doesn't happen very often in normal code, because
15630  // Instcombine/SimplifyCFG should have handled the available opportunities.
15631  // If we did this folding here, it would be necessary to update the
15632  // MachineBasicBlock CFG, which is awkward.
15633 
15634  // Use SimplifySetCC to simplify SETCC's.
15636  CondLHS, CondRHS, CC->get(), SDLoc(N),
15637  false);
15638  if (Simp.getNode()) AddToWorklist(Simp.getNode());
15639 
15640  // fold to a simpler setcc
15641  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15642  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15643  N->getOperand(0), Simp.getOperand(2),
15644  Simp.getOperand(0), Simp.getOperand(1),
15645  N->getOperand(4));
15646 
15647  return SDValue();
15648 }
15649 
15650 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15651  bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15652  const TargetLowering &TLI) {
15653  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15654  if (LD->isIndexed())
15655  return false;
15656  EVT VT = LD->getMemoryVT();
15657  if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15658  return false;
15659  Ptr = LD->getBasePtr();
15660  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15661  if (ST->isIndexed())
15662  return false;
15663  EVT VT = ST->getMemoryVT();
15664  if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15665  return false;
15666  Ptr = ST->getBasePtr();
15667  IsLoad = false;
15668  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15669  if (LD->isIndexed())
15670  return false;
15671  EVT VT = LD->getMemoryVT();
15672  if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15673  !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15674  return false;
15675  Ptr = LD->getBasePtr();
15676  IsMasked = true;
15677  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15678  if (ST->isIndexed())
15679  return false;
15680  EVT VT = ST->getMemoryVT();
15681  if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15682  !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15683  return false;
15684  Ptr = ST->getBasePtr();
15685  IsLoad = false;
15686  IsMasked = true;
15687  } else {
15688  return false;
15689  }
15690  return true;
15691 }
15692 
15693 /// Try turning a load/store into a pre-indexed load/store when the base
15694 /// pointer is an add or subtract and it has other uses besides the load/store.
15695 /// After the transformation, the new indexed load/store has effectively folded
15696 /// the add/subtract in and all of its other uses are redirected to the
15697 /// new load/store.
15698 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15699  if (Level < AfterLegalizeDAG)
15700  return false;
15701 
15702  bool IsLoad = true;
15703  bool IsMasked = false;
15704  SDValue Ptr;
15705  if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15706  Ptr, TLI))
15707  return false;
15708 
15709  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15710  // out. There is no reason to make this a preinc/predec.
15711  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15712  Ptr.getNode()->hasOneUse())
15713  return false;
15714 
15715  // Ask the target to do addressing mode selection.
15716  SDValue BasePtr;
15717  SDValue Offset;
15719  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15720  return false;
15721 
15722  // Backends without true r+i pre-indexed forms may need to pass a
15723  // constant base with a variable offset so that constant coercion
15724  // will work with the patterns in canonical form.
15725  bool Swapped = false;
15726  if (isa<ConstantSDNode>(BasePtr)) {
15727  std::swap(BasePtr, Offset);
15728  Swapped = true;
15729  }
15730 
15731  // Don't create a indexed load / store with zero offset.
15732  if (isNullConstant(Offset))
15733  return false;
15734 
15735  // Try turning it into a pre-indexed load / store except when:
15736  // 1) The new base ptr is a frame index.
15737  // 2) If N is a store and the new base ptr is either the same as or is a
15738  // predecessor of the value being stored.
15739  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15740  // that would create a cycle.
15741  // 4) All uses are load / store ops that use it as old base ptr.
15742 
15743  // Check #1. Preinc'ing a frame index would require copying the stack pointer
15744  // (plus the implicit offset) to a register to preinc anyway.
15745  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15746  return false;
15747 
15748  // Check #2.
15749  if (!IsLoad) {
15750  SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15751  : cast<StoreSDNode>(N)->getValue();
15752 
15753  // Would require a copy.
15754  if (Val == BasePtr)
15755  return false;
15756 
15757  // Would create a cycle.
15758  if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15759  return false;
15760  }
15761 
15762  // Caches for hasPredecessorHelper.
15765  Worklist.push_back(N);
15766 
15767  // If the offset is a constant, there may be other adds of constants that
15768  // can be folded with this one. We should do this to avoid having to keep
15769  // a copy of the original base pointer.
15770  SmallVector<SDNode *, 16> OtherUses;
15771  if (isa<ConstantSDNode>(Offset))
15772  for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15773  UE = BasePtr.getNode()->use_end();
15774  UI != UE; ++UI) {
15775  SDUse &Use = UI.getUse();
15776  // Skip the use that is Ptr and uses of other results from BasePtr's
15777  // node (important for nodes that return multiple results).
15778  if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15779  continue;
15780 
15781  if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15782  continue;
15783 
15784  if (Use.getUser()->getOpcode() != ISD::ADD &&
15785  Use.getUser()->getOpcode() != ISD::SUB) {
15786  OtherUses.clear();
15787  break;
15788  }
15789 
15790  SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15791  if (!isa<ConstantSDNode>(Op1)) {
15792  OtherUses.clear();
15793  break;
15794  }
15795 
15796  // FIXME: In some cases, we can be smarter about this.
15797  if (Op1.getValueType() != Offset.getValueType()) {
15798  OtherUses.clear();
15799  break;
15800  }
15801 
15802  OtherUses.push_back(Use.getUser());
15803  }
15804 
15805  if (Swapped)
15806  std::swap(BasePtr, Offset);
15807 
15808  // Now check for #3 and #4.
15809  bool RealUse = false;
15810 
15811  for (SDNode *Use : Ptr.getNode()->uses()) {
15812  if (Use == N)
15813  continue;
15814  if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15815  return false;
15816 
15817  // If Ptr may be folded in addressing mode of other use, then it's
15818  // not profitable to do this transformation.
15819  if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15820  RealUse = true;
15821  }
15822 
15823  if (!RealUse)
15824  return false;
15825 
15826  SDValue Result;
15827  if (!IsMasked) {
15828  if (IsLoad)
15829  Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15830  else
15831  Result =
15832  DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15833  } else {
15834  if (IsLoad)
15835  Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15836  Offset, AM);
15837  else
15838  Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15839  Offset, AM);
15840  }
15841  ++PreIndexedNodes;
15842  ++NodesCombined;
15843  LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15844  Result.getNode()->dump(&DAG); dbgs() << '\n');
15845  WorklistRemover DeadNodes(*this);
15846  if (IsLoad) {
15847  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15848  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15849  } else {
15850  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15851  }
15852 
15853  // Finally, since the node is now dead, remove it from the graph.
15854  deleteAndRecombine(N);
15855 
15856  if (Swapped)
15857  std::swap(BasePtr, Offset);
15858 
15859  // Replace other uses of BasePtr that can be updated to use Ptr
15860  for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15861  unsigned OffsetIdx = 1;
15862  if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15863  OffsetIdx = 0;
15864  assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15865  BasePtr.getNode() && "Expected BasePtr operand");
15866 
15867  // We need to replace ptr0 in the following expression:
15868  // x0 * offset0 + y0 * ptr0 = t0
15869  // knowing that
15870  // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15871  //
15872  // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15873  // indexed load/store and the expression that needs to be re-written.
15874  //
15875  // Therefore, we have:
15876  // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15877 
15878  auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15879  const APInt &Offset0 = CN->getAPIntValue();
15880  const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15881  int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15882  int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15883  int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15884  int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15885 
15886  unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15887 
15888  APInt CNV = Offset0;
15889  if (X0 < 0) CNV = -CNV;
15890  if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15891  else CNV = CNV - Offset1;
15892 
15893  SDLoc DL(OtherUses[i]);
15894 
15895  // We can now generate the new expression.
15896  SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15897  SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15898 
15899  SDValue NewUse = DAG.getNode(Opcode,
15900  DL,
15901  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15902  DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15903  deleteAndRecombine(OtherUses[i]);
15904  }
15905 
15906  // Replace the uses of Ptr with uses of the updated base value.
15907  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15908  deleteAndRecombine(Ptr.getNode());
15909  AddToWorklist(Result.getNode());
15910 
15911  return true;
15912 }
15913 
15914 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15915  SDValue &BasePtr, SDValue &Offset,
15916  ISD::MemIndexedMode &AM,
15917  SelectionDAG &DAG,
15918  const TargetLowering &TLI) {
15919  if (PtrUse == N ||
15920  (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15921  return false;
15922 
15923  if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15924  return false;
15925 
15926  // Don't create a indexed load / store with zero offset.
15927  if (isNullConstant(Offset))
15928  return false;
15929 
15930  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15931  return false;
15932 
15934  for (SDNode *Use : BasePtr.getNode()->uses()) {
15935  if (Use == Ptr.getNode())
15936  continue;
15937 
15938  // No if there's a later user which could perform the index instead.
15939  if (isa<MemSDNode>(Use)) {
15940  bool IsLoad = true;
15941  bool IsMasked = false;
15942  SDValue OtherPtr;
15944  IsMasked, OtherPtr, TLI)) {
15946  Worklist.push_back(Use);
15947  if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15948  return false;
15949  }
15950  }
15951 
15952  // If all the uses are load / store addresses, then don't do the
15953  // transformation.
15954  if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15955  for (SDNode *UseUse : Use->uses())
15956  if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15957  return false;
15958  }
15959  }
15960  return true;
15961 }
15962 
15963 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15964  bool &IsMasked, SDValue &Ptr,
15965  SDValue &BasePtr, SDValue &Offset,
15966  ISD::MemIndexedMode &AM,
15967  SelectionDAG &DAG,
15968  const TargetLowering &TLI) {
15970  IsMasked, Ptr, TLI) ||
15971  Ptr.getNode()->hasOneUse())
15972  return nullptr;
15973 
15974  // Try turning it into a post-indexed load / store except when
15975  // 1) All uses are load / store ops that use it as base ptr (and
15976  // it may be folded as addressing mmode).
15977  // 2) Op must be independent of N, i.e. Op is neither a predecessor
15978  // nor a successor of N. Otherwise, if Op is folded that would
15979  // create a cycle.
15980  for (SDNode *Op : Ptr->uses()) {
15981  // Check for #1.
15982  if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15983  continue;
15984 
15985  // Check for #2.
15988  // Ptr is predecessor to both N and Op.
15989  Visited.insert(Ptr.getNode());
15990  Worklist.push_back(N);
15991  Worklist.push_back(Op);
15992  if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15993  !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15994  return Op;
15995  }
15996  return nullptr;
15997 }
15998 
15999 /// Try to combine a load/store with a add/sub of the base pointer node into a
16000 /// post-indexed load/store. The transformation folded the add/subtract into the
16001 /// new indexed load/store effectively and all of its uses are redirected to the
16002 /// new load/store.
16003 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
16004  if (Level < AfterLegalizeDAG)
16005  return false;
16006 
16007  bool IsLoad = true;
16008  bool IsMasked = false;
16009  SDValue Ptr;
16010  SDValue BasePtr;
16011  SDValue Offset;
16013  SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
16014  Offset, AM, DAG, TLI);
16015  if (!Op)
16016  return false;
16017 
16018  SDValue Result;
16019  if (!IsMasked)
16020  Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16021  Offset, AM)
16022  : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
16023  BasePtr, Offset, AM);
16024  else
16025  Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
16026  BasePtr, Offset, AM)
16027  : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
16028  BasePtr, Offset, AM);
16029  ++PostIndexedNodes;
16030  ++NodesCombined;
16031  LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
16032  dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
16033  dbgs() << '\n');
16034  WorklistRemover DeadNodes(*this);
16035  if (IsLoad) {
16036  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16037  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16038  } else {
16039  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16040  }
16041 
16042  // Finally, since the node is now dead, remove it from the graph.
16043  deleteAndRecombine(N);
16044 
16045  // Replace the uses of Use with uses of the updated base value.
16047  Result.getValue(IsLoad ? 1 : 0));
16048  deleteAndRecombine(Op);
16049  return true;
16050 }
16051 
16052 /// Return the base-pointer arithmetic from an indexed \p LD.
16053 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
16054  ISD::MemIndexedMode AM = LD->getAddressingMode();
16055  assert(AM != ISD::UNINDEXED);
16056  SDValue BP = LD->getOperand(1);
16057  SDValue Inc = LD->getOperand(2);
16058 
16059  // Some backends use TargetConstants for load offsets, but don't expect
16060  // TargetConstants in general ADD nodes. We can convert these constants into
16061  // regular Constants (if the constant is not opaque).
16062  assert((Inc.getOpcode() != ISD::TargetConstant ||
16063  !cast<ConstantSDNode>(Inc)->isOpaque()) &&
16064  "Cannot split out indexing using opaque target constants");
16065  if (Inc.getOpcode() == ISD::TargetConstant) {
16066  ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
16067  Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
16068  ConstInc->getValueType(0));
16069  }
16070 
16071  unsigned Opc =
16072  (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
16073  return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
16074 }
16075 
16077  return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
16078 }
16079 
16080 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
16081  Val = ST->getValue();
16082  EVT STType = Val.getValueType();
16083  EVT STMemType = ST->getMemoryVT();
16084  if (STType == STMemType)
16085  return true;
16086  if (isTypeLegal(STMemType))
16087  return false; // fail.
16088  if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
16089  TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
16090  Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
16091  return true;
16092  }
16093  if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
16094  STType.isInteger() && STMemType.isInteger()) {
16095  Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
16096  return true;
16097  }
16098  if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
16099  Val = DAG.getBitcast(STMemType, Val);
16100  return true;
16101  }
16102  return false; // fail.
16103 }
16104 
16105 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
16106  EVT LDMemType = LD->getMemoryVT();
16107  EVT LDType = LD->getValueType(0);
16108  assert(Val.getValueType() == LDMemType &&
16109  "Attempting to extend value of non-matching type");
16110  if (LDType == LDMemType)
16111  return true;
16112  if (LDMemType.isInteger() && LDType.isInteger()) {
16113  switch (LD->getExtensionType()) {
16114  case ISD::NON_EXTLOAD:
16115  Val = DAG.getBitcast(LDType, Val);
16116  return true;
16117  case ISD::EXTLOAD:
16118  Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
16119  return true;
16120  case ISD::SEXTLOAD:
16121  Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
16122  return true;
16123  case ISD::ZEXTLOAD:
16124  Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
16125  return true;
16126  }
16127  }
16128  return false;
16129 }
16130 
16131 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16132  if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16133  return SDValue();
16134  SDValue Chain = LD->getOperand(0);
16135  StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16136  // TODO: Relax this restriction for unordered atomics (see D66309)
16137  if (!ST || !ST->isSimple())
16138  return SDValue();
16139 
16140  EVT LDType = LD->getValueType(0);
16141  EVT LDMemType = LD->getMemoryVT();
16142  EVT STMemType = ST->getMemoryVT();
16143  EVT STType = ST->getValue().getValueType();
16144 
16145  // There are two cases to consider here:
16146  // 1. The store is fixed width and the load is scalable. In this case we
16147  // don't know at compile time if the store completely envelops the load
16148  // so we abandon the optimisation.
16149  // 2. The store is scalable and the load is fixed width. We could
16150  // potentially support a limited number of cases here, but there has been
16151  // no cost-benefit analysis to prove it's worth it.
16152  bool LdStScalable = LDMemType.isScalableVector();
16153  if (LdStScalable != STMemType.isScalableVector())
16154  return SDValue();
16155 
16156  // If we are dealing with scalable vectors on a big endian platform the
16157  // calculation of offsets below becomes trickier, since we do not know at
16158  // compile time the absolute size of the vector. Until we've done more
16159  // analysis on big-endian platforms it seems better to bail out for now.
16160  if (LdStScalable && DAG.getDataLayout().isBigEndian())
16161  return SDValue();
16162 
16163  BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
16164  BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
16165  int64_t Offset;
16166  if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16167  return SDValue();
16168 
16169  // Normalize for Endianness. After this Offset=0 will denote that the least
16170  // significant bit in the loaded value maps to the least significant bit in
16171  // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16172  // n:th least significant byte of the stored value.
16173  if (DAG.getDataLayout().isBigEndian())
16174  Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16175  (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16176  8 -
16177  Offset;
16178 
16179  // Check that the stored value cover all bits that are loaded.
16180  bool STCoversLD;
16181 
16182  TypeSize LdMemSize = LDMemType.getSizeInBits();
16183  TypeSize StMemSize = STMemType.getSizeInBits();
16184  if (LdStScalable)
16185  STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16186  else
16187  STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16188  StMemSize.getFixedSize());
16189 
16190  auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16191  if (LD->isIndexed()) {
16192  // Cannot handle opaque target constants and we must respect the user's
16193  // request not to split indexes from loads.
16194  if (!canSplitIdx(LD))
16195  return SDValue();
16196  SDValue Idx = SplitIndexingFromLoad(LD);
16197  SDValue Ops[] = {Val, Idx, Chain};
16198  return CombineTo(LD, Ops, 3);
16199  }
16200  return CombineTo(LD, Val, Chain);
16201  };
16202 
16203  if (!STCoversLD)
16204  return SDValue();
16205 
16206  // Memory as copy space (potentially masked).
16207  if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16208  // Simple case: Direct non-truncating forwarding
16209  if (LDType.getSizeInBits() == LdMemSize)
16210  return ReplaceLd(LD, ST->getValue(), Chain);
16211  // Can we model the truncate and extension with an and mask?
16212  if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16213  !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16214  // Mask to size of LDMemType
16215  auto Mask =
16217  StMemSize.getFixedSize()),
16218  SDLoc(ST), STType);
16219  auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16220  return ReplaceLd(LD, Val, Chain);
16221  }
16222  }
16223 
16224  // TODO: Deal with nonzero offset.
16225  if (LD->getBasePtr().isUndef() || Offset != 0)
16226  return SDValue();
16227  // Model necessary truncations / extenstions.
16228  SDValue Val;
16229  // Truncate Value To Stored Memory Size.
16230  do {
16231  if (!getTruncatedStoreValue(ST, Val))
16232  continue;
16233  if (!isTypeLegal(LDMemType))
16234  continue;
16235  if (STMemType != LDMemType) {
16236  // TODO: Support vectors? This requires extract_subvector/bitcast.
16237  if (!STMemType.isVector() && !LDMemType.isVector() &&
16238  STMemType.isInteger() && LDMemType.isInteger())
16239  Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16240  else
16241  continue;
16242  }
16243  if (!extendLoadedValueToExtension(LD, Val))
16244  continue;
16245  return ReplaceLd(LD, Val, Chain);
16246  } while (false);
16247 
16248  // On failure, cleanup dead nodes we may have created.
16249  if (Val->use_empty())
16250  deleteAndRecombine(Val.getNode());
16251  return SDValue();
16252 }
16253 
16254 SDValue DAGCombiner::visitLOAD(SDNode *N) {
16255  LoadSDNode *LD = cast<LoadSDNode>(N);
16256  SDValue Chain = LD->getChain();
16257  SDValue Ptr = LD->getBasePtr();
16258 
16259  // If load is not volatile and there are no uses of the loaded value (and
16260  // the updated indexed value in case of indexed loads), change uses of the
16261  // chain value into uses of the chain input (i.e. delete the dead load).
16262  // TODO: Allow this for unordered atomics (see D66309)
16263  if (LD->isSimple()) {
16264  if (N->getValueType(1) == MVT::Other) {
16265  // Unindexed loads.
16266  if (!N->hasAnyUseOfValue(0)) {
16267  // It's not safe to use the two value CombineTo variant here. e.g.
16268  // v1, chain2 = load chain1, loc
16269  // v2, chain3 = load chain2, loc
16270  // v3 = add v2, c
16271  // Now we replace use of chain2 with chain1. This makes the second load
16272  // isomorphic to the one we are deleting, and thus makes this load live.
16273  LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
16274  dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
16275  dbgs() << "\n");
16276  WorklistRemover DeadNodes(*this);
16277  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16278  AddUsersToWorklist(Chain.getNode());
16279  if (N->use_empty())
16280  deleteAndRecombine(N);
16281 
16282  return SDValue(N, 0); // Return N so it doesn't get rechecked!
16283  }
16284  } else {
16285  // Indexed loads.
16286  assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
16287 
16288  // If this load has an opaque TargetConstant offset, then we cannot split
16289  // the indexing into an add/sub directly (that TargetConstant may not be
16290  // valid for a different type of node, and we cannot convert an opaque
16291  // target constant into a regular constant).
16292  bool CanSplitIdx = canSplitIdx(LD);
16293 
16294  if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16295  SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16296  SDValue Index;
16297  if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16298  Index = SplitIndexingFromLoad(LD);
16299  // Try to fold the base pointer arithmetic into subsequent loads and
16300  // stores.
16301  AddUsersToWorklist(N);
16302  } else
16303  Index = DAG.getUNDEF(N->getValueType(1));
16304  LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
16305  dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
16306  dbgs() << " and 2 other values\n");
16307  WorklistRemover DeadNodes(*this);
16310  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16311  deleteAndRecombine(N);
16312  return SDValue(N, 0); // Return N so it doesn't get rechecked!
16313  }
16314  }
16315  }
16316 
16317  // If this load is directly stored, replace the load value with the stored
16318  // value.
16319  if (auto V = ForwardStoreValueToDirectLoad(LD))
16320  return V;
16321 
16322  // Try to infer better alignment information than the load already has.
16323  if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16324  if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16325  if (*Alignment > LD->getAlign() &&
16326  isAligned(*Alignment, LD->getSrcValueOffset())) {
16327  SDValue NewLoad = DAG.getExtLoad(
16328  LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16329  LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16330  LD->getMemOperand()->getFlags(), LD->getAAInfo());
16331  // NewLoad will always be N as we are only refining the alignment
16332  assert(NewLoad.getNode() == N);
16333  (void)NewLoad;
16334  }
16335  }
16336  }
16337 
16338  if (LD->isUnindexed()) {
16339  // Walk up chain skipping non-aliasing memory nodes.
16340  SDValue BetterChain = FindBetterChain(LD, Chain);
16341 
16342  // If there is a better chain.
16343  if (Chain != BetterChain) {
16344  SDValue ReplLoad;
16345 
16346  // Replace the chain to void dependency.
16347  if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16348  ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16349  BetterChain, Ptr, LD->getMemOperand());
16350  } else {
16351  ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16352  LD->getValueType(0),
16353  BetterChain, Ptr, LD->getMemoryVT(),
16354  LD->getMemOperand());
16355  }
16356 
16357  // Create token factor to keep old chain connected.
16358  SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16359  MVT::Other, Chain, ReplLoad.getValue(1));
16360 
16361  // Replace uses with load result and token factor
16362  return CombineTo(N, ReplLoad.getValue(0), Token);
16363  }
16364  }
16365 
16366  // Try transforming N to an indexed load.
16367  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16368  return SDValue(N, 0);
16369 
16370  // Try to slice up N to more direct loads if the slices are mapped to
16371  // different register banks or pairing can take place.
16372  if (SliceUpLoad(N))
16373  return SDValue(N, 0);
16374 
16375  return SDValue();
16376 }
16377 
16378 namespace {
16379 
16380 /// Helper structure used to slice a load in smaller loads.
16381 /// Basically a slice is obtained from the following sequence:
16382 /// Origin = load Ty1, Base
16383 /// Shift = srl Ty1 Origin, CstTy Amount
16384 /// Inst = trunc Shift to Ty2
16385 ///
16386 /// Then, it will be rewritten into:
16387 /// Slice = load SliceTy, Base + SliceOffset
16388 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16389 ///
16390 /// SliceTy is deduced from the number of bits that are actually used to
16391 /// build Inst.
16392 struct LoadedSlice {
16393  /// Helper structure used to compute the cost of a slice.
16394  struct Cost {
16395  /// Are we optimizing for code size.
16396  bool ForCodeSize = false;
16397 
16398  /// Various cost.
16399  unsigned Loads = 0;
16400  unsigned Truncates = 0;
16401  unsigned CrossRegisterBanksCopies = 0;
16402  unsigned ZExts = 0;
16403  unsigned Shift = 0;
16404 
16405  explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16406 
16407  /// Get the cost of one isolated slice.
16408  Cost(const LoadedSlice &LS, bool ForCodeSize)
16409  : ForCodeSize(ForCodeSize), Loads(1) {
16410  EVT TruncType = LS.Inst->getValueType(0);
16411  EVT LoadedType = LS.getLoadedType();
16412  if (TruncType != LoadedType &&
16413  !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16414  ZExts = 1;
16415  }
16416 
16417  /// Account for slicing gain in the current cost.
16418  /// Slicing provide a few gains like removing a shift or a
16419  /// truncate. This method allows to grow the cost of the original
16420  /// load with the gain from this slice.
16421  void addSliceGain(const LoadedSlice &LS) {
16422  // Each slice saves a truncate.
16423  const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16424  if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16425  LS.Inst->getValueType(0)))
16426  ++Truncates;
16427  // If there is a shift amount, this slice gets rid of it.
16428  if (LS.Shift)
16429  ++Shift;
16430  // If this slice can merge a cross register bank copy, account for it.
16431  if (LS.canMergeExpensiveCrossRegisterBankCopy())
16432  ++CrossRegisterBanksCopies;
16433  }
16434 
16435  Cost &operator+=(const Cost &RHS) {
16436  Loads += RHS.Loads;
16437  Truncates += RHS.Truncates;
16438  CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16439  ZExts += RHS.ZExts;
16440  Shift += RHS.Shift;
16441  return *this;
16442  }
16443 
16444  bool operator==(const Cost &RHS) const {
16445  return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16446  CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16447  ZExts == RHS.ZExts && Shift == RHS.Shift;
16448  }
16449 
16450  bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16451 
16452  bool operator<(const Cost &RHS) const {
16453  // Assume cross register banks copies are as expensive as loads.
16454  // FIXME: Do we want some more target hooks?
16455  unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16456  unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16457  // Unless we are optimizing for code size, consider the
16458  // expensive operation first.
16459  if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16460  return ExpensiveOpsLHS < ExpensiveOpsRHS;
16461  return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16462  (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16463  }
16464 
16465  bool operator>(const Cost &RHS) const { return RHS < *this; }
16466 
16467  bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16468 
16469  bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16470  };
16471 
16472  // The last instruction that represent the slice. This should be a
16473  // truncate instruction.
16474  SDNode *Inst;
16475 
16476  // The original load instruction.
16477  LoadSDNode *Origin;
16478 
16479  // The right shift amount in bits from the original load.
16480  unsigned Shift;
16481 
16482  // The DAG from which Origin came from.
16483  // This is used to get some contextual information about legal types, etc.
16484  SelectionDAG *DAG;
16485 
16486  LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16487  unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16488  : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16489 
16490  /// Get the bits used in a chunk of bits \p BitWidth large.
16491  /// \return Result is \p BitWidth and has used bits set to 1 and
16492  /// not used bits set to 0.
16493  APInt getUsedBits() const {
16494  // Reproduce the trunc(lshr) sequence:
16495  // - Start from the truncated value.
16496  // - Zero extend to the desired bit width.
16497  // - Shift left.
16498  assert(Origin && "No original load to compare against.");
16499  unsigned BitWidth = Origin->getValueSizeInBits(0);
16500  assert(Inst && "This slice is not bound to an instruction");
16501  assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16502  "Extracted slice is bigger than the whole type!");
16503  APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16504  UsedBits.setAllBits();
16505  UsedBits = UsedBits.zext(BitWidth);
16506  UsedBits <<= Shift;
16507  return UsedBits;
16508  }
16509 
16510  /// Get the size of the slice to be loaded in bytes.
16511  unsigned getLoadedSize() const {
16512  unsigned SliceSize = getUsedBits().countPopulation();
16513  assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16514  return SliceSize / 8;
16515  }
16516 
16517  /// Get the type that will be loaded for this slice.
16518  /// Note: This may not be the final type for the slice.
16519  EVT getLoadedType() const {
16520  assert(DAG && "Missing context");
16521  LLVMContext &Ctxt = *DAG->getContext();
16522  return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16523  }
16524 
16525  /// Get the alignment of the load used for this slice.
16526  Align getAlign() const {
16527  Align Alignment = Origin->getAlign();
16528  uint64_t Offset = getOffsetFromBase();
16529  if (Offset != 0)
16530  Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16531  return Alignment;
16532  }
16533 
16534  /// Check if this slice can be rewritten with legal operations.
16535  bool isLegal() const {
16536  // An invalid slice is not legal.
16537  if (!Origin || !Inst || !DAG)
16538  return false;
16539 
16540  // Offsets are for indexed load only, we do not handle that.
16541  if (!Origin->getOffset().isUndef())
16542  return false;
16543 
16544  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16545 
16546  // Check that the type is legal.
16547  EVT SliceType = getLoadedType();
16548  if (!TLI.isTypeLegal(SliceType))
16549  return false;
16550 
16551  // Check that the load is legal for this type.
16552  if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16553  return false;
16554 
16555  // Check that the offset can be computed.
16556  // 1. Check its type.
16557  EVT PtrType = Origin->getBasePtr().getValueType();
16558  if (PtrType == MVT::Untyped || PtrType.isExtended())
16559  return false;
16560 
16561  // 2. Check that it fits in the immediate.
16562  if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16563  return false;
16564 
16565  // 3. Check that the computation is legal.
16566  if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16567  return false;
16568 
16569  // Check that the zext is legal if it needs one.
16570  EVT TruncateType = Inst->getValueType(0);
16571  if (TruncateType != SliceType &&
16572  !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16573  return false;
16574 
16575  return true;
16576  }
16577 
16578  /// Get the offset in bytes of this slice in the original chunk of
16579  /// bits.
16580  /// \pre DAG != nullptr.
16581  uint64_t getOffsetFromBase() const {
16582  assert(DAG && "Missing context.");
16583  bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16584  assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16585  uint64_t Offset = Shift / 8;
16586  unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16587  assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16588  "The size of the original loaded type is not a multiple of a"
16589  " byte.");
16590  // If Offset is bigger than TySizeInBytes, it means we are loading all
16591  // zeros. This should have been optimized before in the process.
16592  assert(TySizeInBytes > Offset &&
16593  "Invalid shift amount for given loaded size");
16594  if (IsBigEndian)
16595  Offset = TySizeInBytes - Offset - getLoadedSize();
16596  return Offset;
16597  }
16598 
16599  /// Generate the sequence of instructions to load the slice
16600  /// represented by this object and redirect the uses of this slice to
16601  /// this new sequence of instructions.
16602  /// \pre this->Inst && this->Origin are valid Instructions and this
16603  /// object passed the legal check: LoadedSlice::isLegal returned true.
16604  /// \return The last instruction of the sequence used to load the slice.
16605  SDValue loadSlice() const {
16606  assert(Inst && Origin && "Unable to replace a non-existing slice.");
16607  const SDValue &OldBaseAddr = Origin->getBasePtr();
16608  SDValue BaseAddr = OldBaseAddr;
16609  // Get the offset in that chunk of bytes w.r.t. the endianness.
16610  int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16611  assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16612  if (Offset) {
16613  // BaseAddr = BaseAddr + Offset.
16614  EVT ArithType = BaseAddr.getValueType();
16615  SDLoc DL(Origin);
16616  BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16617  DAG->getConstant(Offset, DL, ArithType));
16618  }
16619 
16620  // Create the type of the loaded slice according to its size.
16621  EVT SliceType = getLoadedType();
16622 
16623  // Create the load for the slice.
16624  SDValue LastInst =
16625  DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16627  Origin->getMemOperand()->getFlags());
16628  // If the final type is not the same as the loaded type, this means that
16629  // we have to pad with zero. Create a zero extend for that.
16630  EVT FinalType = Inst->getValueType(0);
16631  if (SliceType != FinalType)
16632  LastInst =
16633  DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16634  return LastInst;
16635  }
16636 
16637  /// Check if this slice can be merged with an expensive cross register
16638  /// bank copy. E.g.,
16639  /// i = load i32
16640  /// f = bitcast i32 i to float
16641  bool canMergeExpensiveCrossRegisterBankCopy() const {
16642  if (!Inst || !Inst->hasOneUse())
16643  return false;
16644  SDNode *Use = *Inst->use_begin();
16645  if (Use->getOpcode() != ISD::BITCAST)
16646  return false;
16647  assert(DAG && "Missing context");
16648  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16649  EVT ResVT = Use->getValueType(0);
16650  const TargetRegisterClass *ResRC =
16651  TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16652  const TargetRegisterClass *ArgRC =
16653  TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16654  Use->getOperand(0)->isDivergent());
16655  if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16656  return false;
16657 
16658  // At this point, we know that we perform a cross-register-bank copy.
16659  // Check if it is expensive.
16661  // Assume bitcasts are cheap, unless both register classes do not
16662  // explicitly share a common sub class.
16663  if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16664  return false;
16665 
16666  // Check if it will be merged with the load.
16667  // 1. Check the alignment / fast memory access constraint.
16668  bool IsFast = false;
16669  if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16670  Origin->getAddressSpace(), getAlign(),
16671  Origin->getMemOperand()->getFlags(), &IsFast) ||
16672  !IsFast)
16673  return false;
16674 
16675  // 2. Check that the load is a legal operation for that type.
16676  if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16677  return false;
16678 
16679  // 3. Check that we do not have a zext in the way.
16680  if (Inst->getValueType(0) != getLoadedType())
16681  return false;
16682 
16683  return true;
16684  }
16685 };
16686 
16687 } // end anonymous namespace
16688 
16689 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16690 /// \p UsedBits looks like 0..0 1..1 0..0.
16691 static bool areUsedBitsDense(const APInt &UsedBits) {
16692  // If all the bits are one, this is dense!
16693  if (UsedBits.isAllOnes())
16694  return true;
16695 
16696  // Get rid of the unused bits on the right.
16697  APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16698  // Get rid of the unused bits on the left.
16699  if (NarrowedUsedBits.countLeadingZeros())
16700  NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16701  // Check that the chunk of bits is completely used.
16702  return NarrowedUsedBits.isAllOnes();
16703 }
16704 
16705 /// Check whether or not \p First and \p Second are next to each other
16706 /// in memory. This means that there is no hole between the bits loaded
16707 /// by \p First and the bits loaded by \p Second.
16708 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16709  const LoadedSlice &Second) {
16710  assert(First.Origin == Second.Origin && First.Origin &&
16711  "Unable to match different memory origins.");
16712  APInt UsedBits = First.getUsedBits();
16713  assert((UsedBits & Second.getUsedBits()) == 0 &&
16714  "Slices are not supposed to overlap.");
16715  UsedBits |= Second.getUsedBits();
16716  return areUsedBitsDense(UsedBits);
16717 }
16718 
16719 /// Adjust the \p GlobalLSCost according to the target
16720 /// paring capabilities and the layout of the slices.
16721 /// \pre \p GlobalLSCost should account for at least as many loads as
16722 /// there is in the slices in \p LoadedSlices.
16724  LoadedSlice::Cost &GlobalLSCost) {
16725  unsigned NumberOfSlices = LoadedSlices.size();
16726  // If there is less than 2 elements, no pairing is possible.
16727  if (NumberOfSlices < 2)
16728  return;
16729 
16730  // Sort the slices so that elements that are likely to be next to each
16731  // other in memory are next to each other in the list.
16732  llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16733  assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16734  return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16735  });
16736  const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16737  // First (resp. Second) is the first (resp. Second) potentially candidate
16738  // to be placed in a paired load.
16739  const LoadedSlice *First = nullptr;
16740  const LoadedSlice *Second = nullptr;
16741  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16742  // Set the beginning of the pair.
16743  First = Second) {
16744  Second = &LoadedSlices[CurrSlice];
16745 
16746  // If First is NULL, it means we start a new pair.
16747  // Get to the next slice.
16748  if (!First)
16749  continue;
16750 
16751  EVT LoadedType = First->getLoadedType();
16752 
16753  // If the types of the slices are different, we cannot pair them.
16754  if (LoadedType != Second->getLoadedType())
16755  continue;
16756 
16757  // Check if the target supplies paired loads for this type.
16758  Align RequiredAlignment;
16759  if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16760  // move to the next pair, this type is hopeless.
16761  Second = nullptr;
16762  continue;
16763  }
16764  // Check if we meet the alignment requirement.
16765  if (First->getAlign() < RequiredAlignment)
16766  continue;
16767 
16768  // Check that both loads are next to each other in memory.
16769  if (!areSlicesNextToEachOther(*First, *Second))
16770  continue;
16771 
16772  assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16773  --GlobalLSCost.Loads;
16774  // Move to the next pair.
16775  Second = nullptr;
16776  }
16777 }
16778 
16779 /// Check the profitability of all involved LoadedSlice.
16780 /// Currently, it is considered profitable if there is exactly two
16781 /// involved slices (1) which are (2) next to each other in memory, and
16782 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16783 ///
16784 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
16785 /// the elements themselves.
16786 ///
16787 /// FIXME: When the cost model will be mature enough, we can relax
16788 /// constraints (1) and (2).
16790  const APInt &UsedBits, bool ForCodeSize) {
16791  unsigned NumberOfSlices = LoadedSlices.size();
16792  if (StressLoadSlicing)
16793  return NumberOfSlices > 1;
16794 
16795  // Check (1).
16796  if (NumberOfSlices != 2)
16797  return false;
16798 
16799  // Check (2).
16800  if (!areUsedBitsDense(UsedBits))
16801  return false;
16802 
16803  // Check (3).
16804  LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16805  // The original code has one big load.
16806  OrigCost.Loads = 1;
16807  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16808  const LoadedSlice &LS = LoadedSlices[CurrSlice];
16809  // Accumulate the cost of all the slices.
16810  LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16811  GlobalSlicingCost += SliceCost;
16812 
16813  // Account as cost in the original configuration the gain obtained
16814  // with the current slices.
16815  OrigCost.addSliceGain(LS);
16816  }
16817 
16818  // If the target supports paired load, adjust the cost accordingly.
16819  adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16820  return OrigCost > GlobalSlicingCost;
16821 }
16822 
16823 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
16824 /// operations, split it in the various pieces being extracted.
16825 ///
16826 /// This sort of thing is introduced by SROA.
16827 /// This slicing takes care not to insert overlapping loads.
16828 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
16829 bool DAGCombiner::SliceUpLoad(SDNode *N) {
16830  if (Level < AfterLegalizeDAG)
16831  return false;
16832 
16833  LoadSDNode *LD = cast<LoadSDNode>(N);
16834  if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16835  !LD->getValueType(0).isInteger())
16836  return false;
16837 
16838  // The algorithm to split up a load of a scalable vector into individual
16839  // elements currently requires knowing the length of the loaded type,
16840  // so will need adjusting to work on scalable vectors.
16841  if (LD->getValueType(0).isScalableVector())
16842  return false;
16843 
16844  // Keep track of already used bits to detect overlapping values.
16845  // In that case, we will just abort the transformation.
16846  APInt UsedBits(LD->getValueSizeInBits(0), 0);
16847 
16848  SmallVector<LoadedSlice, 4> LoadedSlices;
16849 
16850  // Check if this load is used as several smaller chunks of bits.
16851  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16852  // of computation for each trunc.
16853  for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16854  UI != UIEnd; ++UI) {
16855  // Skip the uses of the chain.
16856  if (UI.getUse().getResNo() != 0)
16857  continue;
16858 
16859  SDNode *User = *UI;
16860  unsigned Shift = 0;
16861 
16862  // Check if this is a trunc(lshr).
16863  if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16864  isa<ConstantSDNode>(User->getOperand(1))) {
16865  Shift = User->getConstantOperandVal(1);
16866  User = *User->use_begin();
16867  }
16868 
16869  // At this point, User is a Truncate, iff we encountered, trunc or
16870  // trunc(lshr).
16871  if (User->getOpcode() != ISD::TRUNCATE)
16872  return false;
16873 
16874  // The width of the type must be a power of 2 and greater than 8-bits.
16875  // Otherwise the load cannot be represented in LLVM IR.
16876  // Moreover, if we shifted with a non-8-bits multiple, the slice
16877  // will be across several bytes. We do not support that.
16878  unsigned Width = User->getValueSizeInBits(0);
16879  if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16880  return false;
16881 
16882  // Build the slice for this chain of computations.
16883  LoadedSlice LS(User, LD, Shift, &DAG);
16884  APInt CurrentUsedBits = LS.getUsedBits();
16885 
16886  // Check if this slice overlaps with another.
16887  if ((CurrentUsedBits & UsedBits) != 0)
16888  return false;
16889  // Update the bits used globally.
16890  UsedBits |= CurrentUsedBits;
16891 
16892  // Check if the new slice would be legal.
16893  if (!LS.isLegal())
16894  return false;
16895 
16896  // Record the slice.
16897  LoadedSlices.push_back(LS);
16898  }
16899 
16900  // Abort slicing if it does not seem to be profitable.
16901  if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16902  return false;
16903 
16904  ++SlicedLoads;
16905 
16906  // Rewrite each chain to use an independent load.
16907  // By construction, each chain can be represented by a unique load.
16908 
16909  // Prepare the argument for the new token factor for all the slices.
16910  SmallVector<SDValue, 8> ArgChains;
16911  for (const LoadedSlice &LS : LoadedSlices) {
16912  SDValue SliceInst = LS.loadSlice();
16913  CombineTo(LS.Inst, SliceInst, true);
16914  if (SliceInst.getOpcode() != ISD::LOAD)
16915  SliceInst = SliceInst.getOperand(0);
16916  assert(SliceInst->getOpcode() == ISD::LOAD &&
16917  "It takes more than a zext to get to the loaded slice!!");
16918  ArgChains.push_back(SliceInst.getValue(1));
16919  }
16920 
16922  ArgChains);
16923  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16924  AddToWorklist(Chain.getNode());
16925  return true;
16926 }
16927 
16928 /// Check to see if V is (and load (ptr), imm), where the load is having
16929 /// specific bytes cleared out. If so, return the byte size being masked out
16930 /// and the shift amount.
16931 static std::pair<unsigned, unsigned>
16933  std::pair<unsigned, unsigned> Result(0, 0);
16934 
16935  // Check for the structure we're looking for.
16936  if (V->getOpcode() != ISD::AND ||
16937  !isa<ConstantSDNode>(V->getOperand(1)) ||
16939  return Result;
16940 
16941  // Check the chain and pointer.
16942  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16943  if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
16944 
16945  // This only handles simple types.
16946  if (V.getValueType() != MVT::i16 &&
16947  V.getValueType() != MVT::i32 &&
16948  V.getValueType() != MVT::i64)
16949  return Result;
16950 
16951  // Check the constant mask. Invert it so that the bits being masked out are
16952  // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
16953  // follow the sign bit for uniformity.
16954  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16955  unsigned NotMaskLZ = countLeadingZeros(NotMask);
16956  if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
16957  unsigned NotMaskTZ = countTrailingZeros(NotMask);
16958  if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
16959  if (NotMaskLZ == 64) return Result; // All zero mask.
16960 
16961  // See if we have a continuous run of bits. If so, we have 0*1+0*
16962  if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16963  return Result;
16964 
16965  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16966  if (V.getValueType() != MVT::i64 && NotMaskLZ)
16967  NotMaskLZ -= 64-V.getValueSizeInBits();
16968 
16969  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16970  switch (MaskedBytes) {
16971  case 1:
16972  case 2:
16973  case 4: break;
16974  default: return Result; // All one mask, or 5-byte mask.
16975  }
16976 
16977  // Verify that the first bit starts at a multiple of mask so that the access
16978  // is aligned the same as the access width.
16979  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16980 
16981  // For narrowing to be valid, it must be the case that the load the
16982  // immediately preceding memory operation before the store.
16983  if (LD == Chain.getNode())
16984  ; // ok.
16985  else if (Chain->getOpcode() == ISD::TokenFactor &&
16986  SDValue(LD, 1).hasOneUse()) {
16987  // LD has only 1 chain use so they are no indirect dependencies.
16988  if (!LD->isOperandOf(Chain.getNode()))
16989  return Result;
16990  } else
16991  return Result; // Fail.
16992 
16993  Result.first = MaskedBytes;
16994  Result.second = NotMaskTZ/8;
16995  return Result;
16996 }
16997 
16998 /// Check to see if IVal is something that provides a value as specified by
16999 /// MaskInfo. If so, replace the specified store with a narrower store of
17000 /// truncated IVal.
17001 static SDValue
17002 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
17003  SDValue IVal, StoreSDNode *St,
17004  DAGCombiner *DC) {
17005  unsigned NumBytes = MaskInfo.first;
17006  unsigned ByteShift = MaskInfo.second;
17007  SelectionDAG &DAG = DC->getDAG();
17008 
17009  // Check to see if IVal is all zeros in the part being masked in by the 'or'
17010  // that uses this. If not, this is not a replacement.
17011  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
17012  ByteShift*8, (ByteShift+NumBytes)*8);
17013  if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
17014 
17015  // Check that it is legal on the target to do this. It is legal if the new
17016  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
17017  // legalization (and the target doesn't explicitly think this is a bad idea).
17018  MVT VT = MVT::getIntegerVT(NumBytes * 8);
17019  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17020  if (!DC->isTypeLegal(VT))
17021  return SDValue();
17022  if (St->getMemOperand() &&
17023  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
17024  *St->getMemOperand()))
17025  return SDValue();
17026 
17027  // Okay, we can do this! Replace the 'St' store with a store of IVal that is
17028  // shifted by ByteShift and truncated down to NumBytes.
17029  if (ByteShift) {
17030  SDLoc DL(IVal);
17031  IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
17032  DAG.getConstant(ByteShift*8, DL,
17033  DC->getShiftAmountTy(IVal.getValueType())));
17034  }
17035 
17036  // Figure out the offset for the store and the alignment of the access.
17037  unsigned StOffset;
17038  if (DAG.getDataLayout().isLittleEndian())
17039  StOffset = ByteShift;
17040  else
17041  StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
17042 
17043  SDValue Ptr = St->getBasePtr();
17044  if (StOffset) {
17045  SDLoc DL(IVal);
17046  Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
17047  }
17048 
17049  // Truncate down to the new size.
17050  IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
17051 
17052  ++OpsNarrowed;
17053  return DAG
17054  .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
17055  St->getPointerInfo().getWithOffset(StOffset),
17056  St->getOriginalAlign());
17057 }
17058 
17059 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
17060 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
17061 /// narrowing the load and store if it would end up being a win for performance
17062 /// or code size.
17063 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
17064  StoreSDNode *ST = cast<StoreSDNode>(N);
17065  if (!ST->isSimple())
17066  return SDValue();
17067 
17068  SDValue Chain = ST->getChain();
17069  SDValue Value = ST->getValue();
17070  SDValue Ptr = ST->getBasePtr();
17071  EVT VT = Value.getValueType();
17072 
17073  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
17074  return SDValue();
17075 
17076  unsigned Opc = Value.getOpcode();
17077 
17078  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
17079  // is a byte mask indicating a consecutive number of bytes, check to see if
17080  // Y is known to provide just those bytes. If so, we try to replace the
17081  // load + replace + store sequence with a single (narrower) store, which makes
17082  // the load dead.
17084  std::pair<unsigned, unsigned> MaskedLoad;
17085  MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
17086  if (MaskedLoad.first)
17087  if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17088  Value.getOperand(1), ST,this))
17089  return NewST;
17090 
17091  // Or is commutative, so try swapping X and Y.
17092  MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
17093  if (MaskedLoad.first)
17094  if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17095  Value.getOperand(0), ST,this))
17096  return NewST;
17097  }
17098 
17100  return SDValue();
17101 
17102  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
17103  Value.getOperand(1).getOpcode() != ISD::Constant)
17104  return SDValue();
17105 
17106  SDValue N0 = Value.getOperand(0);
17107  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17108  Chain == SDValue(N0.getNode(), 1)) {
17109  LoadSDNode *LD = cast<LoadSDNode>(N0);
17110  if (LD->getBasePtr() != Ptr ||
17111  LD->getPointerInfo().getAddrSpace() !=
17112  ST->getPointerInfo().getAddrSpace())
17113  return SDValue();
17114 
17115  // Find the type to narrow it the load / op / store to.
17116  SDValue N1 = Value.getOperand(1);
17117  unsigned BitWidth = N1.getValueSizeInBits();
17118  APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
17119  if (Opc == ISD::AND)
17120  Imm ^= APInt::getAllOnes(BitWidth);
17121  if (Imm == 0 || Imm.isAllOnes())
17122  return SDValue();
17123  unsigned ShAmt = Imm.countTrailingZeros();
17124  unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
17125  unsigned NewBW = NextPowerOf2(MSB - ShAmt);
17126  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17127  // The narrowing should be profitable, the load/store operation should be
17128  // legal (or custom) and the store size should be equal to the NewVT width.
17129  while (NewBW < BitWidth &&
17130  (NewVT.getStoreSizeInBits() != NewBW ||
17131  !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17132  !TLI.isNarrowingProfitable(VT, NewVT))) {
17133  NewBW = NextPowerOf2(NewBW);
17134  NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17135  }
17136  if (NewBW >= BitWidth)
17137  return SDValue();
17138 
17139  // If the lsb changed does not start at the type bitwidth boundary,
17140  // start at the previous one.
17141  if (ShAmt % NewBW)
17142  ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17144  std::min(BitWidth, ShAmt + NewBW));
17145  if ((Imm & Mask) == Imm) {
17146  APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17147  if (Opc == ISD::AND)
17148  NewImm ^= APInt::getAllOnes(NewBW);
17149  uint64_t PtrOff = ShAmt / 8;
17150  // For big endian targets, we need to adjust the offset to the pointer to
17151  // load the correct bytes.
17152  if (DAG.getDataLayout().isBigEndian())
17153  PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17154 
17155  bool IsFast = false;
17156  Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17157  if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17158  LD->getAddressSpace(), NewAlign,
17159  LD->getMemOperand()->getFlags(), &IsFast) ||
17160  !IsFast)
17161  return SDValue();
17162 
17163  SDValue NewPtr =
17164  DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
17165  SDValue NewLD =
17166  DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17167  LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17168  LD->getMemOperand()->getFlags(), LD->getAAInfo());
17169  SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17170  DAG.getConstant(NewImm, SDLoc(Value),
17171  NewVT));
17172  SDValue NewST =
17173  DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17174  ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17175 
17176  AddToWorklist(NewPtr.getNode());
17177  AddToWorklist(NewLD.getNode());
17178  AddToWorklist(NewVal.getNode());
17179  WorklistRemover DeadNodes(*this);
17180  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17181  ++OpsNarrowed;
17182  return NewST;
17183  }
17184  }
17185 
17186  return SDValue();
17187 }
17188 
17189 /// For a given floating point load / store pair, if the load value isn't used
17190 /// by any other operations, then consider transforming the pair to integer
17191 /// load / store operations if the target deems the transformation profitable.
17192 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17193  StoreSDNode *ST = cast<StoreSDNode>(N);
17194  SDValue Value = ST->getValue();
17195  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17196  Value.hasOneUse()) {
17197  LoadSDNode *LD = cast<LoadSDNode>(Value);
17198  EVT VT = LD->getMemoryVT();
17199  if (!VT.isFloatingPoint() ||
17200  VT != ST->getMemoryVT() ||
17201  LD->isNonTemporal() ||
17202  ST->isNonTemporal() ||
17203  LD->getPointerInfo().getAddrSpace() != 0 ||
17204  ST->getPointerInfo().getAddrSpace() != 0)
17205  return SDValue();
17206 
17207  TypeSize VTSize = VT.getSizeInBits();
17208 
17209  // We don't know the size of scalable types at compile time so we cannot
17210  // create an integer of the equivalent size.
17211  if (VTSize.isScalable())
17212  return SDValue();
17213 
17214  bool FastLD = false, FastST = false;
17215  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17216  if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17217  !TLI.isOperationLegal(ISD::STORE, IntVT) ||
17220  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17221  *LD->getMemOperand(), &FastLD) ||
17222  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17223  *ST->getMemOperand(), &FastST) ||
17224  !FastLD || !FastST)
17225  return SDValue();
17226 
17227  SDValue NewLD =
17228  DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17229  LD->getPointerInfo(), LD->getAlign());
17230 
17231  SDValue NewST =
17232  DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17233  ST->getPointerInfo(), ST->getAlign());
17234 
17235  AddToWorklist(NewLD.getNode());
17236  AddToWorklist(NewST.getNode());
17237  WorklistRemover DeadNodes(*this);
17238  DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17239  ++LdStFP2Int;
17240  return NewST;
17241  }
17242 
17243  return SDValue();
17244 }
17245 
17246 // This is a helper function for visitMUL to check the profitability
17247 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17248 // MulNode is the original multiply, AddNode is (add x, c1),
17249 // and ConstNode is c2.
17250 //
17251 // If the (add x, c1) has multiple uses, we could increase
17252 // the number of adds if we make this transformation.
17253 // It would only be worth doing this if we can remove a
17254 // multiply in the process. Check for that here.
17255 // To illustrate:
17256 // (A + c1) * c3
17257 // (A + c2) * c3
17258 // We're checking for cases where we have common "c3 * A" expressions.
17259 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
17260  SDValue &AddNode,
17261  SDValue &ConstNode) {
17262  APInt Val;
17263 
17264  // If the add only has one use, and the target thinks the folding is
17265  // profitable or does not lead to worse code, this would be OK to do.
17266  if (AddNode.getNode()->hasOneUse() &&
17267  TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
17268  return true;
17269 
17270  // Walk all the users of the constant with which we're multiplying.
17271  for (SDNode *Use : ConstNode->uses()) {
17272  if (Use == MulNode) // This use is the one we're on right now. Skip it.
17273  continue;
17274 
17275  if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17276  SDNode *OtherOp;
17277  SDNode *MulVar = AddNode.getOperand(0).getNode();
17278 
17279  // OtherOp is what we're multiplying against the constant.
17280  if (Use->getOperand(0) == ConstNode)
17281  OtherOp = Use->getOperand(1).getNode();
17282  else
17283  OtherOp = Use->getOperand(0).getNode();
17284 
17285  // Check to see if multiply is with the same operand of our "add".
17286  //
17287  // ConstNode = CONST
17288  // Use = ConstNode * A <-- visiting Use. OtherOp is A.
17289  // ...
17290  // AddNode = (A + c1) <-- MulVar is A.
17291  // = AddNode * ConstNode <-- current visiting instruction.
17292  //
17293  // If we make this transformation, we will have a common
17294  // multiply (ConstNode * A) that we can save.
17295  if (OtherOp == MulVar)
17296  return true;
17297 
17298  // Now check to see if a future expansion will give us a common
17299  // multiply.
17300  //
17301  // ConstNode = CONST
17302  // AddNode = (A + c1)
17303  // ... = AddNode * ConstNode <-- current visiting instruction.
17304  // ...
17305  // OtherOp = (A + c2)
17306  // Use = OtherOp * ConstNode <-- visiting Use.
17307  //
17308  // If we make this transformation, we will have a common
17309  // multiply (CONST * A) after we also do the same transformation
17310  // to the "t2" instruction.
17311  if (OtherOp->getOpcode() == ISD::ADD &&
17313  OtherOp->getOperand(0).getNode() == MulVar)
17314  return true;
17315  }
17316  }
17317 
17318  // Didn't find a case where this would be profitable.
17319  return false;
17320 }
17321 
17322 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17323  unsigned NumStores) {
17324  SmallVector<SDValue, 8> Chains;
17326  SDLoc StoreDL(StoreNodes[0].MemNode);
17327 
17328  for (unsigned i = 0; i < NumStores; ++i) {
17329  Visited.insert(StoreNodes[i].MemNode);
17330  }
17331 
17332  // don't include nodes that are children or repeated nodes.
17333  for (unsigned i = 0; i < NumStores; ++i) {
17334  if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17335  Chains.push_back(StoreNodes[i].MemNode->getChain());
17336  }
17337 
17338  assert(Chains.size() > 0 && "Chain should have generated a chain");
17339  return DAG.getTokenFactor(StoreDL, Chains);
17340 }
17341 
17342 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17343  SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17344  bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17345  // Make sure we have something to merge.
17346  if (NumStores < 2)
17347  return false;
17348 
17349  assert((!UseTrunc || !UseVector) &&
17350  "This optimization cannot emit a vector truncating store");
17351 
17352  // The latest Node in the DAG.
17353  SDLoc DL(StoreNodes[0].MemNode);
17354 
17355  TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17356  unsigned SizeInBits = NumStores * ElementSizeBits;
17357  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17358 
17360  AAMDNodes AAInfo;
17361  for (unsigned I = 0; I != NumStores; ++I) {
17362  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17363  if (!Flags) {
17364  Flags = St->getMemOperand()->getFlags();
17365  AAInfo = St->getAAInfo();
17366  continue;
17367  }
17368  // Skip merging if there's an inconsistent flag.
17369  if (Flags != St->getMemOperand()->getFlags())
17370  return false;
17371  // Concatenate AA metadata.
17372  AAInfo = AAInfo.concat(St->getAAInfo());
17373  }
17374 
17375  EVT StoreTy;
17376  if (UseVector) {
17377  unsigned Elts = NumStores * NumMemElts;
17378  // Get the type for the merged vector store.
17379  StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17380  } else
17381  StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17382 
17383  SDValue StoredVal;
17384  if (UseVector) {
17385  if (IsConstantSrc) {
17386  SmallVector<SDValue, 8> BuildVector;
17387  for (unsigned I = 0; I != NumStores; ++I) {
17388  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17389  SDValue Val = St->getValue();
17390  // If constant is of the wrong type, convert it now.
17391  if (MemVT != Val.getValueType()) {
17392  Val = peekThroughBitcasts(Val);
17393  // Deal with constants of wrong size.
17394  if (ElementSizeBits != Val.getValueSizeInBits()) {
17395  EVT IntMemVT =
17396  EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17397  if (isa<ConstantFPSDNode>(Val)) {
17398  // Not clear how to truncate FP values.
17399  return false;
17400  } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
17401  Val = DAG.getConstant(C->getAPIntValue()
17402  .zextOrTrunc(Val.getValueSizeInBits())
17403  .zextOrTrunc(ElementSizeBits),
17404  SDLoc(C), IntMemVT);
17405  }
17406  // Make sure correctly size type is the correct type.
17407  Val = DAG.getBitcast(MemVT, Val);
17408  }
17409  BuildVector.push_back(Val);
17410  }
17411  StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17413  DL, StoreTy, BuildVector);
17414  } else {
17416  for (unsigned i = 0; i < NumStores; ++i) {
17417  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17418  SDValue Val = peekThroughBitcasts(St->getValue());
17419  // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17420  // type MemVT. If the underlying value is not the correct
17421  // type, but it is an extraction of an appropriate vector we
17422  // can recast Val to be of the correct type. This may require
17423  // converting between EXTRACT_VECTOR_ELT and
17424  // EXTRACT_SUBVECTOR.
17425  if ((MemVT != Val.getValueType()) &&
17426  (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17427  Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17428  EVT MemVTScalarTy = MemVT.getScalarType();
17429  // We may need to add a bitcast here to get types to line up.
17430  if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17431  Val = DAG.getBitcast(MemVT, Val);
17432  } else {
17433  unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17435  SDValue Vec = Val.getOperand(0);
17436  SDValue Idx = Val.getOperand(1);
17437  Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17438  }
17439  }
17440  Ops.push_back(Val);
17441  }
17442 
17443  // Build the extracted vector elements back into a vector.
17444  StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17446  DL, StoreTy, Ops);
17447  }
17448  } else {
17449  // We should always use a vector store when merging extracted vector
17450  // elements, so this path implies a store of constants.
17451  assert(IsConstantSrc && "Merged vector elements should use vector store");
17452 
17453  APInt StoreInt(SizeInBits, 0);
17454 
17455  // Construct a single integer constant which is made of the smaller
17456  // constant inputs.
17457  bool IsLE = DAG.getDataLayout().isLittleEndian();
17458  for (unsigned i = 0; i < NumStores; ++i) {
17459  unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17460  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17461 
17462  SDValue Val = St->getValue();
17463  Val = peekThroughBitcasts(Val);
17464  StoreInt <<= ElementSizeBits;
17465  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17466  StoreInt |= C->getAPIntValue()
17467  .zextOrTrunc(ElementSizeBits)
17468  .zextOrTrunc(SizeInBits);
17469  } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17470  StoreInt |= C->getValueAPF()
17471  .bitcastToAPInt()
17472  .zextOrTrunc(ElementSizeBits)
17473  .zextOrTrunc(SizeInBits);
17474  // If fp truncation is necessary give up for now.
17475  if (MemVT.getSizeInBits() != ElementSizeBits)
17476  return false;
17477  } else {
17478  llvm_unreachable("Invalid constant element type");
17479  }
17480  }
17481 
17482  // Create the new Load and Store operations.
17483  StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17484  }
17485 
17486  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17487  SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17488 
17489  // make sure we use trunc store if it's necessary to be legal.
17490  SDValue NewStore;
17491  if (!UseTrunc) {
17492  NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17493  FirstInChain->getPointerInfo(),
17494  FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17495  } else { // Must be realized as a trunc store
17496  EVT LegalizedStoredValTy =
17497  TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17498  unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17499  ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17500  SDValue ExtendedStoreVal =
17501  DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17502  LegalizedStoredValTy);
17503  NewStore = DAG.getTruncStore(
17504  NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17505  FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17506  FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17507  }
17508 
17509  // Replace all merged stores with the new store.
17510  for (unsigned i = 0; i < NumStores; ++i)
17511  CombineTo(StoreNodes[i].MemNode, NewStore);
17512 
17513  AddToWorklist(NewChain.getNode());
17514  return true;
17515 }
17516 
17517 void DAGCombiner::getStoreMergeCandidates(
17518  StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17519  SDNode *&RootNode) {
17520  // This holds the base pointer, index, and the offset in bytes from the base
17521  // pointer. We must have a base and an offset. Do not handle stores to undef
17522  // base pointers.
17524  if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17525  return;
17526 
17527  SDValue Val = peekThroughBitcasts(St->getValue());
17528  StoreSource StoreSrc = getStoreSource(Val);
17529  assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17530 
17531  // Match on loadbaseptr if relevant.
17532  EVT MemVT = St->getMemoryVT();
17533  BaseIndexOffset LBasePtr;
17534  EVT LoadVT;
17535  if (StoreSrc == StoreSource::Load) {
17536  auto *Ld = cast<LoadSDNode>(Val);
17537  LBasePtr = BaseIndexOffset::match(Ld, DAG);
17538  LoadVT = Ld->getMemoryVT();
17539  // Load and store should be the same type.
17540  if (MemVT != LoadVT)
17541  return;
17542  // Loads must only have one use.
17543  if (!Ld->hasNUsesOfValue(1, 0))
17544  return;
17545  // The memory operands must not be volatile/indexed/atomic.
17546  // TODO: May be able to relax for unordered atomics (see D66309)
17547  if (!Ld->isSimple() || Ld->isIndexed())
17548  return;
17549  }
17550  auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17551  int64_t &Offset) -> bool {
17552  // The memory operands must not be volatile/indexed/atomic.
17553  // TODO: May be able to relax for unordered atomics (see D66309)
17554  if (!Other->isSimple() || Other->isIndexed())
17555  return false;
17556  // Don't mix temporal stores with non-temporal stores.
17557  if (St->isNonTemporal() != Other->isNonTemporal())
17558  return false;
17559  SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17560  // Allow merging constants of different types as integers.
17561  bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17562  : Other->getMemoryVT() != MemVT;
17563  switch (StoreSrc) {
17564  case StoreSource::Load: {
17565  if (NoTypeMatch)
17566  return false;
17567  // The Load's Base Ptr must also match.
17568  auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17569  if (!OtherLd)
17570  return false;
17571  BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17572  if (LoadVT != OtherLd->getMemoryVT())
17573  return false;
17574  // Loads must only have one use.
17575  if (!OtherLd->hasNUsesOfValue(1, 0))
17576  return false;
17577  // The memory operands must not be volatile/indexed/atomic.
17578  // TODO: May be able to relax for unordered atomics (see D66309)
17579  if (!OtherLd->isSimple() || OtherLd->isIndexed())
17580  return false;
17581  // Don't mix temporal loads with non-temporal loads.
17582  if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17583  return false;
17584  if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17585  return false;
17586  break;
17587  }
17588  case StoreSource::Constant:
17589  if (NoTypeMatch)
17590  return false;
17591  if (!isIntOrFPConstant(OtherBC))
17592  return false;
17593  break;
17594  case StoreSource::Extract:
17595  // Do not merge truncated stores here.
17596  if (Other->isTruncatingStore())
17597  return false;
17598  if (!MemVT.bitsEq(OtherBC.getValueType()))
17599  return false;
17600  if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17601  OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17602  return false;
17603  break;
17604  default:
17605  llvm_unreachable("Unhandled store source for merging");
17606  }
17607  Ptr = BaseIndexOffset::match(Other, DAG);
17608  return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17609  };
17610 
17611  // Check if the pair of StoreNode and the RootNode already bail out many
17612  // times which is over the limit in dependence check.
17613  auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17614  SDNode *RootNode) -> bool {
17615  auto RootCount = StoreRootCountMap.find(StoreNode);
17616  return RootCount != StoreRootCountMap.end() &&
17617  RootCount->second.first == RootNode &&
17618  RootCount->second.second > StoreMergeDependenceLimit;
17619  };
17620 
17621  auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17622  // This must be a chain use.
17623  if (UseIter.getOperandNo() != 0)
17624  return;
17625  if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17626  BaseIndexOffset Ptr;
17627  int64_t PtrDiff;
17628  if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17629  !OverLimitInDependenceCheck(OtherStore, RootNode))
17630  StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17631  }
17632  };
17633 
17634  // We looking for a root node which is an ancestor to all mergable
17635  // stores. We search up through a load, to our root and then down
17636  // through all children. For instance we will find Store{1,2,3} if
17637  // St is Store1, Store2. or Store3 where the root is not a load
17638  // which always true for nonvolatile ops. TODO: Expand
17639  // the search to find all valid candidates through multiple layers of loads.
17640  //
17641  // Root
17642  // |-------|-------|
17643  // Load Load Store3
17644  // | |
17645  // Store1 Store2
17646  //
17647  // FIXME: We should be able to climb and
17648  // descend TokenFactors to find candidates as well.
17649 
17650  RootNode = St->getChain().getNode();
17651 
17652  unsigned NumNodesExplored = 0;
17653  const unsigned MaxSearchNodes = 1024;
17654  if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17655  RootNode = Ldn->getChain().getNode();
17656  for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17657  I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17658  if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17659  for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17660  TryToAddCandidate(I2);
17661  }
17662  // Check stores that depend on the root (e.g. Store 3 in the chart above).
17663  if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
17664  TryToAddCandidate(I);
17665  }
17666  }
17667  } else {
17668  for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17669  I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17670  TryToAddCandidate(I);
17671  }
17672 }
17673 
17674 // We need to check that merging these stores does not cause a loop in
17675 // the DAG. Any store candidate may depend on another candidate
17676 // indirectly through its operand (we already consider dependencies
17677 // through the chain). Check in parallel by searching up from
17678 // non-chain operands of candidates.
17679 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17680  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17681  SDNode *RootNode) {
17682  // FIXME: We should be able to truncate a full search of
17683  // predecessors by doing a BFS and keeping tabs the originating
17684  // stores from which worklist nodes come from in a similar way to
17685  // TokenFactor simplfication.
17686 
17689 
17690  // RootNode is a predecessor to all candidates so we need not search
17691  // past it. Add RootNode (peeking through TokenFactors). Do not count
17692  // these towards size check.
17693 
17694  Worklist.push_back(RootNode);
17695  while (!Worklist.empty()) {
17696  auto N = Worklist.pop_back_val();
17697  if (!Visited.insert(N).second)
17698  continue; // Already present in Visited.
17699  if (N->getOpcode() == ISD::TokenFactor) {
17700  for (SDValue Op : N->ops())
17701  Worklist.push_back(Op.getNode());
17702  }
17703  }
17704 
17705  // Don't count pruning nodes towards max.
17706  unsigned int Max = 1024 + Visited.size();
17707  // Search Ops of store candidates.
17708  for (unsigned i = 0; i < NumStores; ++i) {
17709  SDNode *N = StoreNodes[i].MemNode;
17710  // Of the 4 Store Operands:
17711  // * Chain (Op 0) -> We have already considered these
17712  // in candidate selection and can be
17713  // safely ignored
17714  // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17715  // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17716  // but aren't necessarily fromt the same base node, so
17717  // cycles possible (e.g. via indexed store).
17718  // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17719  // non-indexed stores). Not constant on all targets (e.g. ARM)
17720  // and so can participate in a cycle.
17721  for (unsigned j = 1; j < N->getNumOperands(); ++j)
17722  Worklist.push_back(N->getOperand(j).getNode());
17723  }
17724  // Search through DAG. We can stop early if we find a store node.
17725  for (unsigned i = 0; i < NumStores; ++i)
17726  if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17727  Max)) {
17728  // If the searching bail out, record the StoreNode and RootNode in the
17729  // StoreRootCountMap. If we have seen the pair many times over a limit,
17730  // we won't add the StoreNode into StoreNodes set again.
17731  if (Visited.size() >= Max) {
17732  auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17733  if (RootCount.first == RootNode)
17734  RootCount.second++;
17735  else
17736  RootCount = {RootNode, 1};
17737  }
17738  return false;
17739  }
17740  return true;
17741 }
17742 
17743 unsigned
17744 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17745  int64_t ElementSizeBytes) const {
17746  while (true) {
17747  // Find a store past the width of the first store.
17748  size_t StartIdx = 0;
17749  while ((StartIdx + 1 < StoreNodes.size()) &&
17750  StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17751  StoreNodes[StartIdx + 1].OffsetFromBase)
17752  ++StartIdx;
17753 
17754  // Bail if we don't have enough candidates to merge.
17755  if (StartIdx + 1 >= StoreNodes.size())
17756  return 0;
17757 
17758  // Trim stores that overlapped with the first store.
17759  if (StartIdx)
17760  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17761 
17762  // Scan the memory operations on the chain and find the first
17763  // non-consecutive store memory address.
17764  unsigned NumConsecutiveStores = 1;
17765  int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17766  // Check that the addresses are consecutive starting from the second
17767  // element in the list of stores.
17768  for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17769  int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17770  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17771  break;
17772  NumConsecutiveStores = i + 1;
17773  }
17774  if (NumConsecutiveStores > 1)
17775  return NumConsecutiveStores;
17776 
17777  // There are no consecutive stores at the start of the list.
17778  // Remove the first store and try again.
17779  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17780  }
17781 }
17782 
17783 bool DAGCombiner::tryStoreMergeOfConstants(
17784  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17785  EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17786  LLVMContext &Context = *DAG.getContext();
17787  const DataLayout &DL = DAG.getDataLayout();
17788  int64_t ElementSizeBytes = MemVT.getStoreSize();
17789  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17790  bool MadeChange = false;
17791 
17792  // Store the constants into memory as one consecutive store.
17793  while (NumConsecutiveStores >= 2) {
17794  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17795  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17796  unsigned FirstStoreAlign = FirstInChain->getAlignment();
17797  unsigned LastLegalType = 1;
17798  unsigned LastLegalVectorType = 1;
17799  bool LastIntegerTrunc = false;
17800  bool NonZero = false;
17801  unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17802  for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17803  StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17804  SDValue StoredVal = ST->getValue();
17805  bool IsElementZero = false;
17806  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17807  IsElementZero = C->isZero();
17808  else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17809  IsElementZero = C->getConstantFPValue()->isNullValue();
17810  if (IsElementZero) {
17811  if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17812  FirstZeroAfterNonZero = i;
17813  }
17814  NonZero |= !IsElementZero;
17815 
17816  // Find a legal type for the constant store.
17817  unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17818  EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17819  bool IsFast = false;
17820 
17821  // Break early when size is too large to be legal.
17822  if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17823  break;
17824 
17825  if (TLI.isTypeLegal(StoreTy) &&
17826  TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17827  DAG.getMachineFunction()) &&
17828  TLI.allowsMemoryAccess(Context, DL, StoreTy,
17829  *FirstInChain->getMemOperand(), &IsFast) &&
17830  IsFast) {
17831  LastIntegerTrunc = false;
17832  LastLegalType = i + 1;
17833  // Or check whether a truncstore is legal.
17834  } else if (TLI.getTypeAction(Context, StoreTy) ==
17836  EVT LegalizedStoredValTy =
17837  TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17838  if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17839  TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17840  DAG.getMachineFunction()) &&
17841  TLI.allowsMemoryAccess(Context, DL, StoreTy,
17842  *FirstInChain->getMemOperand(), &IsFast) &&
17843  IsFast) {
17844  LastIntegerTrunc = true;
17845  LastLegalType = i + 1;
17846  }
17847  }
17848 
17849  // We only use vectors if the constant is known to be zero or the
17850  // target allows it and the function is not marked with the
17851  // noimplicitfloat attribute.
17852  if ((!NonZero ||
17853  TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17854  AllowVectors) {
17855  // Find a legal type for the vector store.
17856  unsigned Elts = (i + 1) * NumMemElts;
17857  EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17858  if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17859  TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17860  TLI.allowsMemoryAccess(Context, DL, Ty,
17861  *FirstInChain->getMemOperand(), &IsFast) &&
17862  IsFast)
17863  LastLegalVectorType = i + 1;
17864  }
17865  }
17866 
17867  bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17868  unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17869  bool UseTrunc = LastIntegerTrunc && !UseVector;
17870 
17871  // Check if we found a legal integer type that creates a meaningful
17872  // merge.
17873  if (NumElem < 2) {
17874  // We know that candidate stores are in order and of correct
17875  // shape. While there is no mergeable sequence from the
17876  // beginning one may start later in the sequence. The only
17877  // reason a merge of size N could have failed where another of
17878  // the same size would not have, is if the alignment has
17879  // improved or we've dropped a non-zero value. Drop as many
17880  // candidates as we can here.
17881  unsigned NumSkip = 1;
17882  while ((NumSkip < NumConsecutiveStores) &&
17883  (NumSkip < FirstZeroAfterNonZero) &&
17884  (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17885  NumSkip++;
17886 
17887  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17888  NumConsecutiveStores -= NumSkip;
17889  continue;
17890  }
17891 
17892  // Check that we can merge these candidates without causing a cycle.
17893  if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17894  RootNode)) {
17895  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17896  NumConsecutiveStores -= NumElem;
17897  continue;
17898  }
17899 
17900  MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17901  /*IsConstantSrc*/ true,
17902  UseVector, UseTrunc);
17903 
17904  // Remove merged stores for next iteration.
17905  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17906  NumConsecutiveStores -= NumElem;
17907  }
17908  return MadeChange;
17909 }
17910 
17911 bool DAGCombiner::tryStoreMergeOfExtracts(
17912  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17913  EVT MemVT, SDNode *RootNode) {
17914  LLVMContext &Context = *DAG.getContext();
17915  const DataLayout &DL = DAG.getDataLayout();
17916  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17917  bool MadeChange = false;
17918 
17919  // Loop on Consecutive Stores on success.
17920  while (NumConsecutiveStores >= 2) {
17921  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17922  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17923  unsigned FirstStoreAlign = FirstInChain->getAlignment();
17924  unsigned NumStoresToMerge = 1;
17925  for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17926  // Find a legal type for the vector store.
17927  unsigned Elts = (i + 1) * NumMemElts;
17928  EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17929  bool IsFast = false;
17930 
17931  // Break early when size is too large to be legal.
17932  if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17933  break;
17934 
17935  if (TLI.isTypeLegal(Ty) &&
17936  TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17937  TLI.allowsMemoryAccess(Context, DL, Ty,
17938  *FirstInChain->getMemOperand(), &IsFast) &&
17939  IsFast)
17940  NumStoresToMerge = i + 1;
17941  }
17942 
17943  // Check if we found a legal integer type creating a meaningful
17944  // merge.
17945  if (NumStoresToMerge < 2) {
17946  // We know that candidate stores are in order and of correct
17947  // shape. While there is no mergeable sequence from the
17948  // beginning one may start later in the sequence. The only
17949  // reason a merge of size N could have failed where another of
17950  // the same size would not have, is if the alignment has
17951  // improved. Drop as many candidates as we can here.
17952  unsigned NumSkip = 1;
17953  while ((NumSkip < NumConsecutiveStores) &&
17954  (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17955  NumSkip++;
17956 
17957  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17958  NumConsecutiveStores -= NumSkip;
17959  continue;
17960  }
17961 
17962  // Check that we can merge these candidates without causing a cycle.
17963  if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17964  RootNode)) {
17965  StoreNodes.erase(StoreNodes.begin(),
17966  StoreNodes.begin() + NumStoresToMerge);
17967  NumConsecutiveStores -= NumStoresToMerge;
17968  continue;
17969  }
17970 
17971  MadeChange |= mergeStoresOfConstantsOrVecElts(
17972  StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17973  /*UseVector*/ true, /*UseTrunc*/ false);
17974 
17975  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17976  NumConsecutiveStores -= NumStoresToMerge;
17977  }
17978  return MadeChange;
17979 }
17980 
17981 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17982  unsigned NumConsecutiveStores, EVT MemVT,
17983  SDNode *RootNode, bool AllowVectors,
17984  bool IsNonTemporalStore,
17985  bool IsNonTemporalLoad) {
17986  LLVMContext &Context = *DAG.getContext();
17987  const DataLayout &DL = DAG.getDataLayout();
17988  int64_t ElementSizeBytes = MemVT.getStoreSize();
17989  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17990  bool MadeChange = false;
17991 
17992  // Look for load nodes which are used by the stored values.
17993  SmallVector<MemOpLink, 8> LoadNodes;
17994 
17995  // Find acceptable loads. Loads need to have the same chain (token factor),
17996  // must not be zext, volatile, indexed, and they must be consecutive.
17997  BaseIndexOffset LdBasePtr;
17998 
17999  for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18000  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
18001  SDValue Val = peekThroughBitcasts(St->getValue());
18002  LoadSDNode *Ld = cast<LoadSDNode>(Val);
18003 
18004  BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
18005  // If this is not the first ptr that we check.
18006  int64_t LdOffset = 0;
18007  if (LdBasePtr.getBase().getNode()) {
18008  // The base ptr must be the same.
18009  if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
18010  break;
18011  } else {
18012  // Check that all other base pointers are the same as this one.
18013  LdBasePtr = LdPtr;
18014  }
18015 
18016  // We found a potential memory operand to merge.
18017  LoadNodes.push_back(MemOpLink(Ld, LdOffset));
18018  }
18019 
18020  while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
18021  Align RequiredAlignment;
18022  bool NeedRotate = false;
18023  if (LoadNodes.size() == 2) {
18024  // If we have load/store pair instructions and we only have two values,
18025  // don't bother merging.
18026  if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
18027  StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
18028  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
18029  LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
18030  break;
18031  }
18032  // If the loads are reversed, see if we can rotate the halves into place.
18033  int64_t Offset0 = LoadNodes[0].OffsetFromBase;
18034  int64_t Offset1 = LoadNodes[1].OffsetFromBase;
18035  EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
18036  if (Offset0 - Offset1 == ElementSizeBytes &&
18037  (hasOperation(ISD::ROTL, PairVT) ||
18038  hasOperation(ISD::ROTR, PairVT))) {
18039  std::swap(LoadNodes[0], LoadNodes[1]);
18040  NeedRotate = true;
18041  }
18042  }
18043  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18044  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18045  Align FirstStoreAlign = FirstInChain->getAlign();
18046  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
18047 
18048  // Scan the memory operations on the chain and find the first
18049  // non-consecutive load memory address. These variables hold the index in
18050  // the store node array.
18051 
18052  unsigned LastConsecutiveLoad = 1;
18053 
18054  // This variable refers to the size and not index in the array.
18055  unsigned LastLegalVectorType = 1;
18056  unsigned LastLegalIntegerType = 1;
18057  bool isDereferenceable = true;
18058  bool DoIntegerTruncate = false;
18059  int64_t StartAddress = LoadNodes[0].OffsetFromBase;
18060  SDValue LoadChain = FirstLoad->getChain();
18061  for (unsigned i = 1; i < LoadNodes.size(); ++i) {
18062  // All loads must share the same chain.
18063  if (LoadNodes[i].MemNode->getChain() != LoadChain)
18064  break;
18065 
18066  int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
18067  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18068  break;
18069  LastConsecutiveLoad = i;
18070 
18071  if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
18072  isDereferenceable = false;
18073 
18074  // Find a legal type for the vector store.
18075  unsigned Elts = (i + 1) * NumMemElts;
18076  EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18077 
18078  // Break early when size is too large to be legal.
18079  if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18080  break;
18081 
18082  bool IsFastSt = false;
18083  bool IsFastLd = false;
18084  // Don't try vector types if we need a rotate. We may still fail the
18085  // legality checks for the integer type, but we can't handle the rotate
18086  // case with vectors.
18087  // FIXME: We could use a shuffle in place of the rotate.
18088  if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
18089  TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18090  DAG.getMachineFunction()) &&
18091  TLI.allowsMemoryAccess(Context, DL, StoreTy,
18092  *FirstInChain->getMemOperand(), &IsFastSt) &&
18093  IsFastSt &&
18094  TLI.allowsMemoryAccess(Context, DL, StoreTy,
18095  *FirstLoad->getMemOperand(), &IsFastLd) &&
18096  IsFastLd) {
18097  LastLegalVectorType = i + 1;
18098  }
18099 
18100  // Find a legal type for the integer store.
18101  unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18102  StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18103  if (TLI.isTypeLegal(StoreTy) &&
18104  TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18105  DAG.getMachineFunction()) &&
18106  TLI.allowsMemoryAccess(Context, DL, StoreTy,
18107  *FirstInChain->getMemOperand(), &IsFastSt) &&
18108  IsFastSt &&
18109  TLI.allowsMemoryAccess(Context, DL, StoreTy,
18110  *FirstLoad->getMemOperand(), &IsFastLd) &&
18111  IsFastLd) {
18112  LastLegalIntegerType = i + 1;
18113  DoIntegerTruncate = false;
18114  // Or check whether a truncstore and extload is legal.
18115  } else if (TLI.getTypeAction(Context, StoreTy) ==
18117  EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
18118  if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18119  TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18120  DAG.getMachineFunction()) &&
18121  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18122  TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18123  TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
18124  TLI.allowsMemoryAccess(Context, DL, StoreTy,
18125  *FirstInChain->getMemOperand(), &IsFastSt) &&
18126  IsFastSt &&
18127  TLI.allowsMemoryAccess(Context, DL, StoreTy,
18128  *FirstLoad->getMemOperand(), &IsFastLd) &&
18129  IsFastLd) {
18130  LastLegalIntegerType = i + 1;
18131  DoIntegerTruncate = true;
18132  }
18133  }
18134  }
18135 
18136  // Only use vector types if the vector type is larger than the integer
18137  // type. If they are the same, use integers.
18138  bool UseVectorTy =
18139  LastLegalVectorType > LastLegalIntegerType && AllowVectors;
18140  unsigned LastLegalType =
18141  std::max(LastLegalVectorType, LastLegalIntegerType);
18142 
18143  // We add +1 here because the LastXXX variables refer to location while
18144  // the NumElem refers to array/index size.
18145  unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18146  NumElem = std::min(LastLegalType, NumElem);
18147  Align FirstLoadAlign = FirstLoad->getAlign();
18148 
18149  if (NumElem < 2) {
18150  // We know that candidate stores are in order and of correct
18151  // shape. While there is no mergeable sequence from the
18152  // beginning one may start later in the sequence. The only
18153  // reason a merge of size N could have failed where another of
18154  // the same size would not have is if the alignment or either
18155  // the load or store has improved. Drop as many candidates as we
18156  // can here.
18157  unsigned NumSkip = 1;
18158  while ((NumSkip < LoadNodes.size()) &&
18159  (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18160  (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18161  NumSkip++;
18162  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18163  LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18164  NumConsecutiveStores -= NumSkip;
18165  continue;
18166  }
18167 
18168  // Check that we can merge these candidates without causing a cycle.
18169  if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18170  RootNode)) {
18171  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18172  LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18173  NumConsecutiveStores -= NumElem;
18174  continue;
18175  }
18176 
18177  // Find if it is better to use vectors or integers to load and store
18178  // to memory.
18179  EVT JointMemOpVT;
18180  if (UseVectorTy) {
18181  // Find a legal type for the vector store.
18182  unsigned Elts = NumElem * NumMemElts;
18183  JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18184  } else {
18185  unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18186  JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18187  }
18188 
18189  SDLoc LoadDL(LoadNodes[0].MemNode);
18190  SDLoc StoreDL(StoreNodes[0].MemNode);
18191 
18192  // The merged loads are required to have the same incoming chain, so
18193  // using the first's chain is acceptable.
18194 
18195  SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
18196  AddToWorklist(NewStoreChain.getNode());
18197 
18198  MachineMemOperand::Flags LdMMOFlags =
18199  isDereferenceable ? MachineMemOperand::MODereferenceable
18201  if (IsNonTemporalLoad)
18202  LdMMOFlags |= MachineMemOperand::MONonTemporal;
18203 
18204  MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
18207 
18208  SDValue NewLoad, NewStore;
18209  if (UseVectorTy || !DoIntegerTruncate) {
18210  NewLoad = DAG.getLoad(
18211  JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18212  FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18213  SDValue StoreOp = NewLoad;
18214  if (NeedRotate) {
18215  unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18216  assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
18217  "Unexpected type for rotate-able load pair");
18218  SDValue RotAmt =
18219  DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
18220  // Target can convert to the identical ROTR if it does not have ROTL.
18221  StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
18222  }
18223  NewStore = DAG.getStore(
18224  NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18225  FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18226  } else { // This must be the truncstore/extload case
18227  EVT ExtendedTy =
18228  TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
18229  NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
18230  FirstLoad->getChain(), FirstLoad->getBasePtr(),
18231  FirstLoad->getPointerInfo(), JointMemOpVT,
18232  FirstLoadAlign, LdMMOFlags);
18233  NewStore = DAG.getTruncStore(
18234  NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18235  FirstInChain->getPointerInfo(), JointMemOpVT,
18236  FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18237  }
18238 
18239  // Transfer chain users from old loads to the new load.
18240  for (unsigned i = 0; i < NumElem; ++i) {
18241  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18243  SDValue(NewLoad.getNode(), 1));
18244  }
18245 
18246  // Replace all stores with the new store. Recursively remove corresponding
18247  // values if they are no longer used.
18248  for (unsigned i = 0; i < NumElem; ++i) {
18249  SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18250  CombineTo(StoreNodes[i].MemNode, NewStore);
18251  if (Val.getNode()->use_empty())
18252  recursivelyDeleteUnusedNodes(Val.getNode());
18253  }
18254 
18255  MadeChange = true;
18256  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18257  LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18258  NumConsecutiveStores -= NumElem;
18259  }
18260  return MadeChange;
18261 }
18262 
18263 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18264  if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18265  return false;
18266 
18267  // TODO: Extend this function to merge stores of scalable vectors.
18268  // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18269  // store since we know <vscale x 16 x i8> is exactly twice as large as
18270  // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18271  EVT MemVT = St->getMemoryVT();
18272  if (MemVT.isScalableVector())
18273  return false;
18274  if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18275  return false;
18276 
18277  // This function cannot currently deal with non-byte-sized memory sizes.
18278  int64_t ElementSizeBytes = MemVT.getStoreSize();
18279  if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18280  return false;
18281 
18282  // Do not bother looking at stored values that are not constants, loads, or
18283  // extracted vector elements.
18284  SDValue StoredVal = peekThroughBitcasts(St->getValue());
18285  const StoreSource StoreSrc = getStoreSource(StoredVal);
18286  if (StoreSrc == StoreSource::Unknown)
18287  return false;
18288 
18289  SmallVector<MemOpLink, 8> StoreNodes;
18290  SDNode *RootNode;
18291  // Find potential store merge candidates by searching through chain sub-DAG
18292  getStoreMergeCandidates(St, StoreNodes, RootNode);
18293 
18294  // Check if there is anything to merge.
18295  if (StoreNodes.size() < 2)
18296  return false;
18297 
18298  // Sort the memory operands according to their distance from the
18299  // base pointer.
18300  llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18301  return LHS.OffsetFromBase < RHS.OffsetFromBase;
18302  });
18303 
18304  bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
18305  Attribute::NoImplicitFloat);
18306  bool IsNonTemporalStore = St->isNonTemporal();
18307  bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18308  cast<LoadSDNode>(StoredVal)->isNonTemporal();
18309 
18310  // Store Merge attempts to merge the lowest stores. This generally
18311  // works out as if successful, as the remaining stores are checked
18312  // after the first collection of stores is merged. However, in the
18313  // case that a non-mergeable store is found first, e.g., {p[-2],
18314  // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18315  // mergeable cases. To prevent this, we prune such stores from the
18316  // front of StoreNodes here.
18317  bool MadeChange = false;
18318  while (StoreNodes.size() > 1) {
18319  unsigned NumConsecutiveStores =
18320  getConsecutiveStores(StoreNodes, ElementSizeBytes);
18321  // There are no more stores in the list to examine.
18322  if (NumConsecutiveStores == 0)
18323  return MadeChange;
18324 
18325  // We have at least 2 consecutive stores. Try to merge them.
18326  assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
18327  switch (StoreSrc) {
18328  case StoreSource::Constant:
18329  MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
18330  MemVT, RootNode, AllowVectors);
18331  break;
18332 
18333  case StoreSource::Extract:
18334  MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
18335  MemVT, RootNode);
18336  break;
18337 
18338  case StoreSource::Load:
18339  MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
18340  MemVT, RootNode, AllowVectors,
18341  IsNonTemporalStore, IsNonTemporalLoad);
18342  break;
18343 
18344  default:
18345  llvm_unreachable("Unhandled store source type");
18346  }
18347  }
18348  return MadeChange;
18349 }
18350 
18351 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18352  SDLoc SL(ST);
18353  SDValue ReplStore;
18354 
18355  // Replace the chain to avoid dependency.
18356  if (ST->isTruncatingStore()) {
18357  ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18358  ST->getBasePtr(), ST->getMemoryVT(),
18359  ST->getMemOperand());
18360  } else {
18361  ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18362  ST->getMemOperand());
18363  }
18364 
18365  // Create token to keep both nodes around.
18366  SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18367  MVT::Other, ST->getChain(), ReplStore);
18368 
18369  // Make sure the new and old chains are cleaned up.
18370  AddToWorklist(Token.getNode());
18371 
18372  // Don't add users to work list.
18373  return CombineTo(ST, Token, false);
18374 }
18375 
18376 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18377  SDValue Value = ST->getValue();
18378  if (Value.getOpcode() == ISD::TargetConstantFP)
18379  return SDValue();
18380 
18381  if (!ISD::isNormalStore(ST))
18382  return SDValue();
18383 
18384  SDLoc DL(ST);
18385 
18386  SDValue Chain = ST->getChain();
18387  SDValue Ptr = ST->getBasePtr();
18388 
18389  const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18390 
18391  // NOTE: If the original store is volatile, this transform must not increase
18392  // the number of stores. For example, on x86-32 an f64 can be stored in one
18393  // processor operation but an i64 (which is not legal) requires two. So the
18394  // transform should not be done in this case.
18395 
18396  SDValue Tmp;
18397  switch (CFP->getSimpleValueType(0).SimpleTy) {
18398  default:
18399  llvm_unreachable("Unknown FP type");
18400  case MVT::f16: // We don't do this for these yet.
18401  case MVT::f80:
18402  case MVT::f128:
18403  case MVT::ppcf128:
18404  return SDValue();
18405  case MVT::f32:
18406  if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18408  ;
18409  Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18410  bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18411  MVT::i32);
18412  return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18413  }
18414 
18415  return SDValue();
18416  case MVT::f64:
18417  if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18418  ST->isSimple()) ||
18420  ;
18421  Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18422  getZExtValue(), SDLoc(CFP), MVT::i64);
18423  return DAG.getStore(Chain, DL, Tmp,
18424  Ptr, ST->getMemOperand());
18425  }
18426 
18427  if (ST->isSimple() &&
18429  // Many FP stores are not made apparent until after legalize, e.g. for
18430  // argument passing. Since this is so common, custom legalize the
18431  // 64-bit integer store into two 32-bit stores.
18433  SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18434  SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18435  if (DAG.getDataLayout().isBigEndian())
18436  std::swap(Lo, Hi);
18437 
18438  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18439  AAMDNodes AAInfo = ST->getAAInfo();
18440 
18441  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18442  ST->getOriginalAlign(), MMOFlags, AAInfo);
18443  Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18444  SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18445  ST->getPointerInfo().getWithOffset(4),
18446  ST->getOriginalAlign(), MMOFlags, AAInfo);
18447  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18448  St0, St1);
18449  }
18450 
18451  return SDValue();
18452  }
18453 }
18454 
18455 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18456  StoreSDNode *ST = cast<StoreSDNode>(N);
18457  SDValue Chain = ST->getChain();
18458  SDValue Value = ST->getValue();
18459  SDValue Ptr = ST->getBasePtr();
18460 
18461  // If this is a store of a bit convert, store the input value if the
18462  // resultant store does not need a higher alignment than the original.
18463  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18464  ST->isUnindexed()) {
18465  EVT SVT = Value.getOperand(0).getValueType();
18466  // If the store is volatile, we only want to change the store type if the
18467  // resulting store is legal. Otherwise we might increase the number of
18468  // memory accesses. We don't care if the original type was legal or not
18469  // as we assume software couldn't rely on the number of accesses of an
18470  // illegal type.
18471  // TODO: May be able to relax for unordered atomics (see D66309)
18472  if (((!LegalOperations && ST->isSimple()) ||
18473  TLI.isOperationLegal(ISD::STORE, SVT)) &&
18474  TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18475  DAG, *ST->getMemOperand())) {
18476  return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18477  ST->getMemOperand());
18478  }
18479  }
18480 
18481  // Turn 'store undef, Ptr' -> nothing.
18482  if (Value.isUndef() && ST->isUnindexed())
18483  return Chain;
18484 
18485  // Try to infer better alignment information than the store already has.
18486  if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18487  if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18488  if (*Alignment > ST->getAlign() &&
18489  isAligned(*Alignment, ST->getSrcValueOffset())) {
18490  SDValue NewStore =
18491  DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18492  ST->getMemoryVT(), *Alignment,
18493  ST->getMemOperand()->getFlags(), ST->getAAInfo());
18494  // NewStore will always be N as we are only refining the alignment
18495  assert(NewStore.getNode() == N);
18496  (void)NewStore;
18497  }
18498  }
18499  }
18500 
18501  // Try transforming a pair floating point load / store ops to integer
18502  // load / store ops.
18503  if (SDValue NewST = TransformFPLoadStorePair(N))
18504  return NewST;
18505 
18506  // Try transforming several stores into STORE (BSWAP).
18507  if (SDValue Store = mergeTruncStores(ST))
18508  return Store;
18509 
18510  if (ST->isUnindexed()) {
18511  // Walk up chain skipping non-aliasing memory nodes, on this store and any
18512  // adjacent stores.
18513  if (findBetterNeighborChains(ST)) {
18514  // replaceStoreChain uses CombineTo, which handled all of the worklist
18515  // manipulation. Return the original node to not do anything else.
18516  return SDValue(ST, 0);
18517  }
18518  Chain = ST->getChain();
18519  }
18520 
18521  // FIXME: is there such a thing as a truncating indexed store?
18522  if (ST->isTruncatingStore() && ST->isUnindexed() &&
18523  Value.getValueType().isInteger() &&
18524  (!isa<ConstantSDNode>(Value) ||
18525  !cast<ConstantSDNode>(Value)->isOpaque())) {
18526  // Convert a truncating store of a extension into a standard store.
18527  if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
18528  Value.getOpcode() == ISD::SIGN_EXTEND ||
18529  Value.getOpcode() == ISD::ANY_EXTEND) &&
18530  Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
18531  TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
18532  return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18533  ST->getMemOperand());
18534 
18535  APInt TruncDemandedBits =
18536  APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18537  ST->getMemoryVT().getScalarSizeInBits());
18538 
18539  // See if we can simplify the input to this truncstore with knowledge that
18540  // only the low bits are being used. For example:
18541  // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
18542  AddToWorklist(Value.getNode());
18543  if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18544  return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18545  ST->getMemOperand());
18546 
18547  // Otherwise, see if we can simplify the operation with
18548  // SimplifyDemandedBits, which only works if the value has a single use.
18549  if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18550  // Re-visit the store if anything changed and the store hasn't been merged
18551  // with another node (N is deleted) SimplifyDemandedBits will add Value's
18552  // node back to the worklist if necessary, but we also need to re-visit
18553  // the Store node itself.
18554  if (N->getOpcode() != ISD::DELETED_NODE)
18555  AddToWorklist(N);
18556  return SDValue(N, 0);
18557  }
18558  }
18559 
18560  // If this is a load followed by a store to the same location, then the store
18561  // is dead/noop.
18562  // TODO: Can relax for unordered atomics (see D66309)
18563  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18564  if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18565  ST->isUnindexed() && ST->isSimple() &&
18566  Ld->getAddressSpace() == ST->getAddressSpace() &&
18567  // There can't be any side effects between the load and store, such as
18568  // a call or store.
18569  Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18570  // The store is dead, remove it.
18571  return Chain;
18572  }
18573  }
18574 
18575  // TODO: Can relax for unordered atomics (see D66309)
18576  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18577  if (ST->isUnindexed() && ST->isSimple() &&
18578  ST1->isUnindexed() && ST1->isSimple()) {
18579  if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
18580  ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
18581  ST->getAddressSpace() == ST1->getAddressSpace()) {
18582  // If this is a store followed by a store with the same value to the
18583  // same location, then the store is dead/noop.
18584  return Chain;
18585  }
18586 
18587  if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18588  !ST1->getBasePtr().isUndef() &&
18589  // BaseIndexOffset and the code below requires knowing the size
18590  // of a vector, so bail out if MemoryVT is scalable.
18591  !ST->getMemoryVT().isScalableVector() &&
18592  !ST1->getMemoryVT().isScalableVector() &&
18593  ST->getAddressSpace() == ST1->getAddressSpace()) {
18594  const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18595  const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18596  unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18597  unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18598  // If this is a store who's preceding store to a subset of the current
18599  // location and no one other node is chained to that store we can
18600  // effectively drop the store. Do not remove stores to undef as they may
18601  // be used as data sinks.
18602  if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18603  CombineTo(ST1, ST1->getChain());
18604  return SDValue();
18605  }
18606  }
18607  }
18608  }
18609 
18610  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18611  // truncating store. We can do this even if this is already a truncstore.
18612  if ((Value.getOpcode() == ISD::FP_ROUND ||
18613  Value.getOpcode() == ISD::TRUNCATE) &&
18614  Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18615  TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18616  ST->getMemoryVT(), LegalOperations)) {
18617  return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18618  Ptr, ST->getMemoryVT(), ST->getMemOperand());
18619  }
18620 
18621  // Always perform this optimization before types are legal. If the target
18622  // prefers, also try this after legalization to catch stores that were created
18623  // by intrinsics or other nodes.
18624  if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18625  while (true) {
18626  // There can be multiple store sequences on the same chain.
18627  // Keep trying to merge store sequences until we are unable to do so
18628  // or until we merge the last store on the chain.
18629  bool Changed = mergeConsecutiveStores(ST);
18630  if (!Changed) break;
18631  // Return N as merge only uses CombineTo and no worklist clean
18632  // up is necessary.
18633  if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18634  return SDValue(N, 0);
18635  }
18636  }
18637 
18638  // Try transforming N to an indexed store.
18639  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18640  return SDValue(N, 0);
18641 
18642  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18643  //
18644  // Make sure to do this only after attempting to merge stores in order to
18645  // avoid changing the types of some subset of stores due to visit order,
18646  // preventing their merging.
18647  if (isa<ConstantFPSDNode>(ST->getValue())) {
18648  if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18649  return NewSt;
18650  }
18651 
18652  if (SDValue NewSt = splitMergedValStore(ST))
18653  return NewSt;
18654 
18655  return ReduceLoadOpStoreWidth(N);
18656 }
18657 
18658 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18659  const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18660  if (!LifetimeEnd->hasOffset())
18661  return SDValue();
18662 
18663  const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18664  LifetimeEnd->getOffset(), false);
18665 
18666  // We walk up the chains to find stores.
18667  SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18668  while (!Chains.empty()) {
18669  SDValue Chain = Chains.pop_back_val();
18670  if (!Chain.hasOneUse())
18671  continue;
18672  switch (Chain.getOpcode()) {
18673  case ISD::TokenFactor:
18674  for (unsigned Nops = Chain.getNumOperands(); Nops;)
18675  Chains.push_back(Chain.getOperand(--Nops));
18676  break;
18677  case ISD::LIFETIME_START:
18678  case ISD::LIFETIME_END:
18679  // We can forward past any lifetime start/end that can be proven not to
18680  // alias the node.
18681  if (!mayAlias(Chain.getNode(), N))
18682  Chains.push_back(Chain.getOperand(0));
18683  break;
18684  case ISD::STORE: {
18685  StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18686  // TODO: Can relax for unordered atomics (see D66309)
18687  if (!ST->isSimple() || ST->isIndexed())
18688  continue;
18689  const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18690  // The bounds of a scalable store are not known until runtime, so this
18691  // store cannot be elided.
18692  if (StoreSize.isScalable())
18693  continue;
18694  const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18695  // If we store purely within object bounds just before its lifetime ends,
18696  // we can remove the store.
18697  if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18698  StoreSize.getFixedSize() * 8)) {
18699  LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18700  dbgs() << "\nwithin LIFETIME_END of : ";
18701  LifetimeEndBase.dump(); dbgs() << "\n");
18702  CombineTo(ST, ST->getChain());
18703  return SDValue(N, 0);
18704  }
18705  }
18706  }
18707  }
18708  return SDValue();
18709 }
18710 
18711 /// For the instruction sequence of store below, F and I values
18712 /// are bundled together as an i64 value before being stored into memory.
18713 /// Sometimes it is more efficent to generate separate stores for F and I,
18714 /// which can remove the bitwise instructions or sink them to colder places.
18715 ///
18716 /// (store (or (zext (bitcast F to i32) to i64),
18717 /// (shl (zext I to i64), 32)), addr) -->
18718 /// (store F, addr) and (store I, addr+4)
18719 ///
18720 /// Similarly, splitting for other merged store can also be beneficial, like:
18721 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18722 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18723 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18724 /// For pair of {i16, i8}, i32 store --> two i16 stores.
18725 /// For pair of {i8, i8}, i16 store --> two i8 stores.
18726 ///
18727 /// We allow each target to determine specifically which kind of splitting is
18728 /// supported.
18729 ///
18730 /// The store patterns are commonly seen from the simple code snippet below
18731 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18732 /// void goo(const std::pair<int, float> &);
18733 /// hoo() {
18734 /// ...
18735 /// goo(std::make_pair(tmp, ftmp));
18736 /// ...
18737 /// }
18738 ///
18740  if (OptLevel == CodeGenOpt::None)
18741  return SDValue();
18742 
18743  // Can't change the number of memory accesses for a volatile store or break
18744  // atomicity for an atomic one.
18745  if (!ST->isSimple())
18746  return SDValue();
18747 
18748  SDValue Val = ST->getValue();
18749  SDLoc DL(ST);
18750 
18751  // Match OR operand.
18752  if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18753  return SDValue();
18754 
18755  // Match SHL operand and get Lower and Higher parts of Val.
18756  SDValue Op1 = Val.getOperand(0);
18757  SDValue Op2 = Val.getOperand(1);
18758  SDValue Lo, Hi;
18759  if (Op1.getOpcode() != ISD::SHL) {
18760  std::swap(Op1, Op2);
18761  if (Op1.getOpcode() != ISD::SHL)
18762  return SDValue();
18763  }
18764  Lo = Op2;
18765  Hi = Op1.getOperand(0);
18766  if (!Op1.hasOneUse())
18767  return SDValue();
18768 
18769  // Match shift amount to HalfValBitSize.
18770  unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18771  ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18772  if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18773  return SDValue();
18774 
18775  // Lo and Hi are zero-extended from int with size less equal than 32
18776  // to i64.
18777  if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18778  !Lo.getOperand(0).getValueType().isScalarInteger() ||
18779  Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18780  Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18781  !Hi.getOperand(0).getValueType().isScalarInteger() ||
18782  Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18783  return SDValue();
18784 
18785  // Use the EVT of low and high parts before bitcast as the input
18786  // of target query.
18787  EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18788  ? Lo.getOperand(0).getValueType()
18789  : Lo.getValueType();
18790  EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18791  ? Hi.getOperand(0).getValueType()
18792  : Hi.getValueType();
18793  if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18794  return SDValue();
18795 
18796  // Start to split store.
18797  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18798  AAMDNodes AAInfo = ST->getAAInfo();
18799 
18800  // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18801  EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18802  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18803  Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18804 
18805  SDValue Chain = ST->getChain();
18806  SDValue Ptr = ST->getBasePtr();
18807  // Lower value store.
18808  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18809  ST->getOriginalAlign(), MMOFlags, AAInfo);
18810  Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18811  // Higher value store.
18812  SDValue St1 = DAG.getStore(
18813  St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18814  ST->getOriginalAlign(), MMOFlags, AAInfo);
18815  return St1;
18816 }
18817 
18818 /// Convert a disguised subvector insertion into a shuffle:
18819 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18820  assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18821  "Expected extract_vector_elt");
18822  SDValue InsertVal = N->getOperand(1);
18823  SDValue Vec = N->getOperand(0);
18824 
18825  // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18826  // InsIndex)
18827  // --> (vector_shuffle X, Y) and variations where shuffle operands may be
18828  // CONCAT_VECTORS.
18829  if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18830  InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18831  isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18832  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18833  ArrayRef<int> Mask = SVN->getMask();
18834 
18835  SDValue X = Vec.getOperand(0);
18836  SDValue Y = Vec.getOperand(1);
18837 
18838  // Vec's operand 0 is using indices from 0 to N-1 and
18839  // operand 1 from N to 2N - 1, where N is the number of
18840  // elements in the vectors.
18841  SDValue InsertVal0 = InsertVal.getOperand(0);
18842  int ElementOffset = -1;
18843 
18844  // We explore the inputs of the shuffle in order to see if we find the
18845  // source of the extract_vector_elt. If so, we can use it to modify the
18846  // shuffle rather than perform an insert_vector_elt.
18847  SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18848  ArgWorkList.emplace_back(Mask.size(), Y);
18849  ArgWorkList.emplace_back(0, X);
18850 
18851  while (!ArgWorkList.empty()) {
18852  int ArgOffset;
18853  SDValue ArgVal;
18854  std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18855 
18856  if (ArgVal == InsertVal0) {
18857  ElementOffset = ArgOffset;
18858  break;
18859  }
18860 
18861  // Peek through concat_vector.
18862  if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18863  int CurrentArgOffset =
18864  ArgOffset + ArgVal.getValueType().getVectorNumElements();
18865  int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18866  for (SDValue Op : reverse(ArgVal->ops())) {
18867  CurrentArgOffset -= Step;
18868  ArgWorkList.emplace_back(CurrentArgOffset, Op);
18869  }
18870 
18871  // Make sure we went through all the elements and did not screw up index
18872  // computation.
18873  assert(CurrentArgOffset == ArgOffset);
18874  }
18875  }
18876 
18877  if (ElementOffset != -1) {
18878  SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18879 
18880  auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18881  NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18882  assert(NewMask[InsIndex] <
18883  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18884  NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18885 
18886  SDValue LegalShuffle =
18888  Y, NewMask, DAG);
18889  if (LegalShuffle)
18890  return LegalShuffle;
18891  }
18892  }
18893 
18894  // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18895  // bitcast(shuffle (bitcast V), (extended X), Mask)
18896  // Note: We do not use an insert_subvector node because that requires a
18897  // legal subvector type.
18898  if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18899  !InsertVal.getOperand(0).getValueType().isVector())
18900  return SDValue();
18901 
18902  SDValue SubVec = InsertVal.getOperand(0);
18903  SDValue DestVec = N->getOperand(0);
18904  EVT SubVecVT = SubVec.getValueType();
18905  EVT VT = DestVec.getValueType();
18906  unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18907  // If the source only has a single vector element, the cost of creating adding
18908  // it to a vector is likely to exceed the cost of a insert_vector_elt.
18909  if (NumSrcElts == 1)
18910  return SDValue();
18911  unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18912  unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18913 
18914  // Step 1: Create a shuffle mask that implements this insert operation. The
18915  // vector that we are inserting into will be operand 0 of the shuffle, so
18916  // those elements are just 'i'. The inserted subvector is in the first
18917  // positions of operand 1 of the shuffle. Example:
18918  // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18919  SmallVector<int, 16> Mask(NumMaskVals);
18920  for (unsigned i = 0; i != NumMaskVals; ++i) {
18921  if (i / NumSrcElts == InsIndex)
18922  Mask[i] = (i % NumSrcElts) + NumMaskVals;
18923  else
18924  Mask[i] = i;
18925  }
18926 
18927  // Bail out if the target can not handle the shuffle we want to create.
18928  EVT SubVecEltVT = SubVecVT.getVectorElementType();
18929  EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18930  if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18931  return SDValue();
18932 
18933  // Step 2: Create a wide vector from the inserted source vector by appending
18934  // undefined elements. This is the same size as our destination vector.
18935  SDLoc DL(N);
18936  SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18937  ConcatOps[0] = SubVec;
18938  SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18939 
18940  // Step 3: Shuffle in the padded subvector.
18941  SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18942  SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18943  AddToWorklist(PaddedSubV.getNode());
18944  AddToWorklist(DestVecBC.getNode());
18945  AddToWorklist(Shuf.getNode());
18946  return DAG.getBitcast(VT, Shuf);
18947 }
18948 
18949 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18950  SDValue InVec = N->getOperand(0);
18951  SDValue InVal = N->getOperand(1);
18952  SDValue EltNo = N->getOperand(2);
18953  SDLoc DL(N);
18954 
18955  EVT VT = InVec.getValueType();
18956  auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18957 
18958  // Insert into out-of-bounds element is undefined.
18959  if (IndexC && VT.isFixedLengthVector() &&
18960  IndexC->getZExtValue() >= VT.getVectorNumElements())
18961  return DAG.getUNDEF(VT);
18962 
18963  // Remove redundant insertions:
18964  // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18965  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18966  InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18967  return InVec;
18968 
18969  if (!IndexC) {
18970  // If this is variable insert to undef vector, it might be better to splat:
18971  // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18972  if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18973  if (VT.isScalableVector())
18974  return DAG.getSplatVector(VT, DL, InVal);
18975  else {
18977  return DAG.getBuildVector(VT, DL, Ops);
18978  }
18979  }
18980  return SDValue();
18981  }
18982 
18983  if (VT.isScalableVector())
18984  return SDValue();
18985 
18986  unsigned NumElts = VT.getVectorNumElements();
18987 
18988  // We must know which element is being inserted for folds below here.
18989  unsigned Elt = IndexC->getZExtValue();
18990  if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18991  return Shuf;
18992 
18993  // Canonicalize insert_vector_elt dag nodes.
18994  // Example:
18995  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18996  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18997  //
18998  // Do this only if the child insert_vector node has one use; also
18999  // do this only if indices are both constants and Idx1 < Idx0.
19000  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
19001  && isa<ConstantSDNode>(InVec.getOperand(2))) {
19002  unsigned OtherElt = InVec.getConstantOperandVal(2);
19003  if (Elt < OtherElt) {
19004  // Swap nodes.
19005  SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
19006  InVec.getOperand(0), InVal, EltNo);
19007  AddToWorklist(NewOp.getNode());
19008  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
19009  VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
19010  }
19011  }
19012 
19013  // If we can't generate a legal BUILD_VECTOR, exit
19014  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
19015  return SDValue();
19016 
19017  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
19018  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
19019  // vector elements.
19021  // Do not combine these two vectors if the output vector will not replace
19022  // the input vector.
19023  if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
19024  Ops.append(InVec.getNode()->op_begin(),
19025  InVec.getNode()->op_end());
19026  } else if (InVec.isUndef()) {
19027  Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
19028  } else {
19029  return SDValue();
19030  }
19031  assert(Ops.size() == NumElts && "Unexpected vector size");
19032 
19033  // Insert the element
19034  if (Elt < Ops.size()) {
19035  // All the operands of BUILD_VECTOR must have the same type;
19036  // we enforce that here.
19037  EVT OpVT = Ops[0].getValueType();
19038  Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
19039  }
19040 
19041  // Return the new vector
19042  return DAG.getBuildVector(VT, DL, Ops);
19043 }
19044 
19045 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
19046  SDValue EltNo,
19047  LoadSDNode *OriginalLoad) {
19048  assert(OriginalLoad->isSimple());
19049 
19050  EVT ResultVT = EVE->getValueType(0);
19051  EVT VecEltVT = InVecVT.getVectorElementType();
19052 
19053  // If the vector element type is not a multiple of a byte then we are unable
19054  // to correctly compute an address to load only the extracted element as a
19055  // scalar.
19056  if (!VecEltVT.isByteSized())
19057  return SDValue();
19058 
19059  ISD::LoadExtType ExtTy =
19060  ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
19061  if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
19062  !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
19063  return SDValue();
19064 
19065  Align Alignment = OriginalLoad->getAlign();
19066  MachinePointerInfo MPI;
19067  SDLoc DL(EVE);
19068  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
19069  int Elt = ConstEltNo->getZExtValue();
19070  unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
19071  MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
19072  Alignment = commonAlignment(Alignment, PtrOff);
19073  } else {
19074  // Discard the pointer info except the address space because the memory
19075  // operand can't represent this new access since the offset is variable.
19076  MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
19077  Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
19078  }
19079 
19080  bool IsFast = false;
19081  if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
19082  OriginalLoad->getAddressSpace(), Alignment,
19083  OriginalLoad->getMemOperand()->getFlags(),
19084  &IsFast) ||
19085  !IsFast)
19086  return SDValue();
19087 
19088  SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
19089  InVecVT, EltNo);
19090 
19091  // The replacement we need to do here is a little tricky: we need to
19092  // replace an extractelement of a load with a load.
19093  // Use ReplaceAllUsesOfValuesWith to do the replacement.
19094  // Note that this replacement assumes that the extractvalue is the only
19095  // use of the load; that's okay because we don't want to perform this
19096  // transformation in other cases anyway.
19097  SDValue Load;
19098  SDValue Chain;
19099  if (ResultVT.bitsGT(VecEltVT)) {
19100  // If the result type of vextract is wider than the load, then issue an
19101  // extending load instead.
19102  ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
19103  VecEltVT)
19104  ? ISD::ZEXTLOAD
19105  : ISD::EXTLOAD;
19106  Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
19107  OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
19108  Alignment, OriginalLoad->getMemOperand()->getFlags(),
19109  OriginalLoad->getAAInfo());
19110  Chain = Load.getValue(1);
19111  } else {
19112  Load = DAG.getLoad(
19113  VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
19114  OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
19115  Chain = Load.getValue(1);
19116  if (ResultVT.bitsLT(VecEltVT))
19117  Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
19118  else
19119  Load = DAG.getBitcast(ResultVT, Load);
19120  }
19121  WorklistRemover DeadNodes(*this);
19122  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
19123  SDValue To[] = { Load, Chain };
19124  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
19125  // Make sure to revisit this node to clean it up; it will usually be dead.
19126  AddToWorklist(EVE);
19127  // Since we're explicitly calling ReplaceAllUses, add the new node to the
19128  // worklist explicitly as well.
19129  AddToWorklistWithUsers(Load.getNode());
19130  ++OpsNarrowed;
19131  return SDValue(EVE, 0);
19132 }
19133 
19134 /// Transform a vector binary operation into a scalar binary operation by moving
19135 /// the math/logic after an extract element of a vector.
19137  bool LegalOperations) {
19138  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19139  SDValue Vec = ExtElt->getOperand(0);
19140  SDValue Index = ExtElt->getOperand(1);
19141  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19142  if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19143  Vec.getNode()->getNumValues() != 1)
19144  return SDValue();
19145 
19146  // Targets may want to avoid this to prevent an expensive register transfer.
19147  if (!TLI.shouldScalarizeBinop(Vec))
19148  return SDValue();
19149 
19150  // Extracting an element of a vector constant is constant-folded, so this
19151  // transform is just replacing a vector op with a scalar op while moving the
19152  // extract.
19153  SDValue Op0 = Vec.getOperand(0);
19154  SDValue Op1 = Vec.getOperand(1);
19155  if (isAnyConstantBuildVector(Op0, true) ||
19156  isAnyConstantBuildVector(Op1, true)) {
19157  // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19158  // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19159  SDLoc DL(ExtElt);
19160  EVT VT = ExtElt->getValueType(0);
19161  SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19162  SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19163  return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19164  }
19165 
19166  return SDValue();
19167 }
19168 
19169 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19170  SDValue VecOp = N->getOperand(0);
19171  SDValue Index = N->getOperand(1);
19172  EVT ScalarVT = N->getValueType(0);
19173  EVT VecVT = VecOp.getValueType();
19174  if (VecOp.isUndef())
19175  return DAG.getUNDEF(ScalarVT);
19176 
19177  // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19178  //
19179  // This only really matters if the index is non-constant since other combines
19180  // on the constant elements already work.
19181  SDLoc DL(N);
19182  if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19183  Index == VecOp.getOperand(2)) {
19184  SDValue Elt = VecOp.getOperand(1);
19185  return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19186  }
19187 
19188  // (vextract (scalar_to_vector val, 0) -> val
19189  if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19190  // Only 0'th element of SCALAR_TO_VECTOR is defined.
19191  if (DAG.isKnownNeverZero(Index))
19192  return DAG.getUNDEF(ScalarVT);
19193 
19194  // Check if the result type doesn't match the inserted element type. A
19195  // SCALAR_TO_VECTOR may truncate the inserted element and the
19196  // EXTRACT_VECTOR_ELT may widen the extracted vector.
19197  SDValue InOp = VecOp.getOperand(0);
19198  if (InOp.getValueType() != ScalarVT) {
19199  assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19200  return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19201  }
19202  return InOp;
19203  }
19204 
19205  // extract_vector_elt of out-of-bounds element -> UNDEF
19206  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19207  if (IndexC && VecVT.isFixedLengthVector() &&
19208  IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19209  return DAG.getUNDEF(ScalarVT);
19210 
19211  // extract_vector_elt (build_vector x, y), 1 -> y
19212  if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19213  VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19214  TLI.isTypeLegal(VecVT) &&
19215  (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19216  assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
19217  VecVT.isFixedLengthVector()) &&
19218  "BUILD_VECTOR used for scalable vectors");
19219  unsigned IndexVal =
19220  VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19221  SDValue Elt = VecOp.getOperand(IndexVal);
19222  EVT InEltVT = Elt.getValueType();
19223 
19224  // Sometimes build_vector's scalar input types do not match result type.
19225  if (ScalarVT == InEltVT)
19226  return Elt;
19227 
19228  // TODO: It may be useful to truncate if free if the build_vector implicitly
19229  // converts.
19230  }
19231 
19232  if (VecVT.isScalableVector())
19233  return SDValue();
19234 
19235  // All the code from this point onwards assumes fixed width vectors, but it's
19236  // possible that some of the combinations could be made to work for scalable
19237  // vectors too.
19238  unsigned NumElts = VecVT.getVectorNumElements();
19239  unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19240 
19241  // TODO: These transforms should not require the 'hasOneUse' restriction, but
19242  // there are regressions on multiple targets without it. We can end up with a
19243  // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19244  if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19245  VecOp.hasOneUse()) {
19246  // The vector index of the LSBs of the source depend on the endian-ness.
19247  bool IsLE = DAG.getDataLayout().isLittleEndian();
19248  unsigned ExtractIndex = IndexC->getZExtValue();
19249  // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19250  unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19251  SDValue BCSrc = VecOp.getOperand(0);
19252  if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19253  return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19254 
19255  if (LegalTypes && BCSrc.getValueType().isInteger() &&
19256  BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19257  // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19258  // trunc i64 X to i32
19259  SDValue X = BCSrc.getOperand(0);
19260  assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
19261  "Extract element and scalar to vector can't change element type "
19262  "from FP to integer.");
19263  unsigned XBitWidth = X.getValueSizeInBits();
19264  BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
19265 
19266  // An extract element return value type can be wider than its vector
19267  // operand element type. In that case, the high bits are undefined, so
19268  // it's possible that we may need to extend rather than truncate.
19269  if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
19270  assert(XBitWidth % VecEltBitWidth == 0 &&
19271  "Scalar bitwidth must be a multiple of vector element bitwidth");
19272  return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19273  }
19274  }
19275  }
19276 
19277  if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19278  return BO;
19279 
19280  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19281  // We only perform this optimization before the op legalization phase because
19282  // we may introduce new vector instructions which are not backed by TD
19283  // patterns. For example on AVX, extracting elements from a wide vector
19284  // without using extract_subvector. However, if we can find an underlying
19285  // scalar value, then we can always use that.
19286  if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19287  auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
19288  // Find the new index to extract from.
19289  int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19290 
19291  // Extracting an undef index is undef.
19292  if (OrigElt == -1)
19293  return DAG.getUNDEF(ScalarVT);
19294 
19295  // Select the right vector half to extract from.
19296  SDValue SVInVec;
19297  if (OrigElt < (int)NumElts) {
19298  SVInVec = VecOp.getOperand(0);
19299  } else {
19300  SVInVec = VecOp.getOperand(1);
19301  OrigElt -= NumElts;
19302  }
19303 
19304  if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19305  SDValue InOp = SVInVec.getOperand(OrigElt);
19306  if (InOp.getValueType() != ScalarVT) {
19307  assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19308  InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19309  }
19310 
19311  return InOp;
19312  }
19313 
19314  // FIXME: We should handle recursing on other vector shuffles and
19315  // scalar_to_vector here as well.
19316 
19317  if (!LegalOperations ||
19318  // FIXME: Should really be just isOperationLegalOrCustom.
19320  TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
19321  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
19322  DAG.getVectorIdxConstant(OrigElt, DL));
19323  }
19324  }
19325 
19326  // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19327  // simplify it based on the (valid) extraction indices.
19328  if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19329  return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19330  Use->getOperand(0) == VecOp &&
19331  isa<ConstantSDNode>(Use->getOperand(1));
19332  })) {
19333  APInt DemandedElts = APInt::getZero(NumElts);
19334  for (SDNode *Use : VecOp->uses()) {
19335  auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19336  if (CstElt->getAPIntValue().ult(NumElts))
19337  DemandedElts.setBit(CstElt->getZExtValue());
19338  }
19339  if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
19340  // We simplified the vector operand of this extract element. If this
19341  // extract is not dead, visit it again so it is folded properly.
19342  if (N->getOpcode() != ISD::DELETED_NODE)
19343  AddToWorklist(N);
19344  return SDValue(N, 0);
19345  }
19346  APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
19347  if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
19348  // We simplified the vector operand of this extract element. If this
19349  // extract is not dead, visit it again so it is folded properly.
19350  if (N->getOpcode() != ISD::DELETED_NODE)
19351  AddToWorklist(N);
19352  return SDValue(N, 0);
19353  }
19354  }
19355 
19356  // Everything under here is trying to match an extract of a loaded value.
19357  // If the result of load has to be truncated, then it's not necessarily
19358  // profitable.
19359  bool BCNumEltsChanged = false;
19360  EVT ExtVT = VecVT.getVectorElementType();
19361  EVT LVT = ExtVT;
19362  if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19363  return SDValue();
19364 
19365  if (VecOp.getOpcode() == ISD::BITCAST) {
19366  // Don't duplicate a load with other uses.
19367  if (!VecOp.hasOneUse())
19368  return SDValue();
19369 
19370  EVT BCVT = VecOp.getOperand(0).getValueType();
19371  if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19372  return SDValue();
19373  if (NumElts != BCVT.getVectorNumElements())
19374  BCNumEltsChanged = true;
19375  VecOp = VecOp.getOperand(0);
19376  ExtVT = BCVT.getVectorElementType();
19377  }
19378 
19379  // extract (vector load $addr), i --> load $addr + i * size
19380  if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19381  ISD::isNormalLoad(VecOp.getNode()) &&
19382  !Index->hasPredecessor(VecOp.getNode())) {
19383  auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
19384  if (VecLoad && VecLoad->isSimple())
19385  return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19386  }
19387 
19388  // Perform only after legalization to ensure build_vector / vector_shuffle
19389  // optimizations have already been done.
19390  if (!LegalOperations || !IndexC)
19391  return SDValue();
19392 
19393  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19394  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19395  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19396  int Elt = IndexC->getZExtValue();
19397  LoadSDNode *LN0 = nullptr;
19398  if (ISD::isNormalLoad(VecOp.getNode())) {
19399  LN0 = cast<LoadSDNode>(VecOp);
19400  } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19401  VecOp.getOperand(0).getValueType() == ExtVT &&
19402  ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19403  // Don't duplicate a load with other uses.
19404  if (!VecOp.hasOneUse())
19405  return SDValue();
19406 
19407  LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19408  }
19409  if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
19410  // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19411  // =>
19412  // (load $addr+1*size)
19413 
19414  // Don't duplicate a load with other uses.
19415  if (!VecOp.hasOneUse())
19416  return SDValue();
19417 
19418  // If the bit convert changed the number of elements, it is unsafe
19419  // to examine the mask.
19420  if (BCNumEltsChanged)
19421  return SDValue();
19422 
19423  // Select the input vector, guarding against out of range extract vector.
19424  int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19425  VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19426 
19427  if (VecOp.getOpcode() == ISD::BITCAST) {
19428  // Don't duplicate a load with other uses.
19429  if (!VecOp.hasOneUse())
19430  return SDValue();
19431 
19432  VecOp = VecOp.getOperand(0);
19433  }
19434  if (ISD::isNormalLoad(VecOp.getNode())) {
19435  LN0 = cast<LoadSDNode>(VecOp);
19436  Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19437  Index = DAG.getConstant(Elt, DL, Index.getValueType());
19438  }
19439  } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19440  VecVT.getVectorElementType() == ScalarVT &&
19441  (!LegalTypes ||
19442  TLI.isTypeLegal(
19443  VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19444  // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19445  // -> extract_vector_elt a, 0
19446  // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19447  // -> extract_vector_elt a, 1
19448  // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19449  // -> extract_vector_elt b, 0
19450  // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19451  // -> extract_vector_elt b, 1
19452  SDLoc SL(N);
19453  EVT ConcatVT = VecOp.getOperand(0).getValueType();
19454  unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19455  SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19456  Index.getValueType());
19457 
19458  SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19459  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19460  ConcatVT.getVectorElementType(),
19461  ConcatOp, NewIdx);
19462  return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19463  }
19464 
19465  // Make sure we found a non-volatile load and the extractelement is
19466  // the only use.
19467  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19468  return SDValue();
19469 
19470  // If Idx was -1 above, Elt is going to be -1, so just return undef.
19471  if (Elt == -1)
19472  return DAG.getUNDEF(LVT);
19473 
19474  return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19475 }
19476 
19477 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
19478 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19479  // We perform this optimization post type-legalization because
19480  // the type-legalizer often scalarizes integer-promoted vectors.
19481  // Performing this optimization before may create bit-casts which
19482  // will be type-legalized to complex code sequences.
19483  // We perform this optimization only before the operation legalizer because we
19484  // may introduce illegal operations.
19486  return SDValue();
19487 
19488  unsigned NumInScalars = N->getNumOperands();
19489  SDLoc DL(N);
19490  EVT VT = N->getValueType(0);
19491 
19492  // Check to see if this is a BUILD_VECTOR of a bunch of values
19493  // which come from any_extend or zero_extend nodes. If so, we can create
19494  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19495  // optimizations. We do not handle sign-extend because we can't fill the sign
19496  // using shuffles.
19497  EVT SourceType = MVT::Other;
19498  bool AllAnyExt = true;
19499 
19500  for (unsigned i = 0; i != NumInScalars; ++i) {
19501  SDValue In = N->getOperand(i);
19502  // Ignore undef inputs.
19503  if (In.isUndef()) continue;
19504 
19505  bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
19506  bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19507 
19508  // Abort if the element is not an extension.
19509  if (!ZeroExt && !AnyExt) {
19510  SourceType = MVT::Other;
19511  break;
19512  }
19513 
19514  // The input is a ZeroExt or AnyExt. Check the original type.
19515  EVT InTy = In.getOperand(0).getValueType();
19516 
19517  // Check that all of the widened source types are the same.
19518  if (SourceType == MVT::Other)
19519  // First time.
19520  SourceType = InTy;
19521  else if (InTy != SourceType) {
19522  // Multiple income types. Abort.
19523  SourceType = MVT::Other;
19524  break;
19525  }
19526 
19527  // Check if all of the extends are ANY_EXTENDs.
19528  AllAnyExt &= AnyExt;
19529  }
19530 
19531  // In order to have valid types, all of the inputs must be extended from the
19532  // same source type and all of the inputs must be any or zero extend.
19533  // Scalar sizes must be a power of two.
19534  EVT OutScalarTy = VT.getScalarType();
19535  bool ValidTypes = SourceType != MVT::Other &&
19536  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19537  isPowerOf2_32(SourceType.getSizeInBits());
19538 
19539  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19540  // turn into a single shuffle instruction.
19541  if (!ValidTypes)
19542  return SDValue();
19543 
19544  // If we already have a splat buildvector, then don't fold it if it means
19545  // introducing zeros.
19546  if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19547  return SDValue();
19548 
19549  bool isLE = DAG.getDataLayout().isLittleEndian();
19550  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19551  assert(ElemRatio > 1 && "Invalid element size ratio");
19552  SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19553  DAG.getConstant(0, DL, SourceType);
19554 
19555  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19556  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19557 
19558  // Populate the new build_vector
19559  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19560  SDValue Cast = N->getOperand(i);
19561  assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19562  Cast.getOpcode() == ISD::ZERO_EXTEND ||
19563  Cast.isUndef()) && "Invalid cast opcode");
19564  SDValue In;
19565  if (Cast.isUndef())
19566  In = DAG.getUNDEF(SourceType);
19567  else
19568  In = Cast->getOperand(0);
19569  unsigned Index = isLE ? (i * ElemRatio) :
19570  (i * ElemRatio + (ElemRatio - 1));
19571 
19572  assert(Index < Ops.size() && "Invalid index");
19573  Ops[Index] = In;
19574  }
19575 
19576  // The type of the new BUILD_VECTOR node.
19577  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19578  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19579  "Invalid vector size");
19580  // Check if the new vector type is legal.
19581  if (!isTypeLegal(VecVT) ||
19582  (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19584  return SDValue();
19585 
19586  // Make the new BUILD_VECTOR.
19587  SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19588 
19589  // The new BUILD_VECTOR node has the potential to be further optimized.
19590  AddToWorklist(BV.getNode());
19591  // Bitcast to the desired type.
19592  return DAG.getBitcast(VT, BV);
19593 }
19594 
19595 // Simplify (build_vec (trunc $1)
19596 // (trunc (srl $1 half-width))
19597 // (trunc (srl $1 (2 * half-width))) …)
19598 // to (bitcast $1)
19599 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19600  assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19601 
19602  // Only for little endian
19603  if (!DAG.getDataLayout().isLittleEndian())
19604  return SDValue();
19605 
19606  SDLoc DL(N);
19607  EVT VT = N->getValueType(0);
19608  EVT OutScalarTy = VT.getScalarType();
19609  uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19610 
19611  // Only for power of two types to be sure that bitcast works well
19612  if (!isPowerOf2_64(ScalarTypeBitsize))
19613  return SDValue();
19614 
19615  unsigned NumInScalars = N->getNumOperands();
19616 
19617  // Look through bitcasts
19618  auto PeekThroughBitcast = [](SDValue Op) {
19619  if (Op.getOpcode() == ISD::BITCAST)
19620  return Op.getOperand(0);
19621  return Op;
19622  };
19623 
19624  // The source value where all the parts are extracted.
19625  SDValue Src;
19626  for (unsigned i = 0; i != NumInScalars; ++i) {
19627  SDValue In = PeekThroughBitcast(N->getOperand(i));
19628  // Ignore undef inputs.
19629  if (In.isUndef()) continue;
19630 
19631  if (In.getOpcode() != ISD::TRUNCATE)
19632  return SDValue();
19633 
19634  In = PeekThroughBitcast(In.getOperand(0));
19635 
19636  if (In.getOpcode() != ISD::SRL) {
19637  // For now only build_vec without shuffling, handle shifts here in the
19638  // future.
19639  if (i != 0)
19640  return SDValue();
19641 
19642  Src = In;
19643  } else {
19644  // In is SRL
19645  SDValue part = PeekThroughBitcast(In.getOperand(0));
19646 
19647  if (!Src) {
19648  Src = part;
19649  } else if (Src != part) {
19650  // Vector parts do not stem from the same variable
19651  return SDValue();
19652  }
19653 
19654  SDValue ShiftAmtVal = In.getOperand(1);
19655  if (!isa<ConstantSDNode>(ShiftAmtVal))
19656  return SDValue();
19657 
19658  uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19659 
19660  // The extracted value is not extracted at the right position
19661  if (ShiftAmt != i * ScalarTypeBitsize)
19662  return SDValue();
19663  }
19664  }
19665 
19666  // Only cast if the size is the same
19667  if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19668  return SDValue();
19669 
19670  return DAG.getBitcast(VT, Src);
19671 }
19672 
19673 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19674  ArrayRef<int> VectorMask,
19675  SDValue VecIn1, SDValue VecIn2,
19676  unsigned LeftIdx, bool DidSplitVec) {
19677  SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19678 
19679  EVT VT = N->getValueType(0);
19680  EVT InVT1 = VecIn1.getValueType();
19681  EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19682 
19683  unsigned NumElems = VT.getVectorNumElements();
19684  unsigned ShuffleNumElems = NumElems;
19685 
19686  // If we artificially split a vector in two already, then the offsets in the
19687  // operands will all be based off of VecIn1, even those in VecIn2.
19688  unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19689 
19690  uint64_t VTSize = VT.getFixedSizeInBits();
19691  uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19692  uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19693 
19694  assert(InVT2Size <= InVT1Size &&
19695  "Inputs must be sorted to be in non-increasing vector size order.");
19696 
19697  // We can't generate a shuffle node with mismatched input and output types.
19698  // Try to make the types match the type of the output.
19699  if (InVT1 != VT || InVT2 != VT) {
19700  if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19701  // If the output vector length is a multiple of both input lengths,
19702  // we can concatenate them and pad the rest with undefs.
19703  unsigned NumConcats = VTSize / InVT1Size;
19704  assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19705  SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19706  ConcatOps[0] = VecIn1;
19707  ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19708  VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19709  VecIn2 = SDValue();
19710  } else if (InVT1Size == VTSize * 2) {
19711  if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19712  return SDValue();
19713 
19714  if (!VecIn2.getNode()) {
19715  // If we only have one input vector, and it's twice the size of the
19716  // output, split it in two.
19717  VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19718  DAG.getVectorIdxConstant(NumElems, DL));
19719  VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19720  // Since we now have shorter input vectors, adjust the offset of the
19721  // second vector's start.
19722  Vec2Offset = NumElems;
19723  } else {
19724  assert(InVT2Size <= InVT1Size &&
19725  "Second input is not going to be larger than the first one.");
19726 
19727  // VecIn1 is wider than the output, and we have another, possibly
19728  // smaller input. Pad the smaller input with undefs, shuffle at the
19729  // input vector width, and extract the output.
19730  // The shuffle type is different than VT, so check legality again.
19731  if (LegalOperations &&
19733  return SDValue();
19734 
19735  // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19736  // lower it back into a BUILD_VECTOR. So if the inserted type is
19737  // illegal, don't even try.
19738  if (InVT1 != InVT2) {
19739  if (!TLI.isTypeLegal(InVT2))
19740  return SDValue();
19741  VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19742  DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19743  }
19744  ShuffleNumElems = NumElems * 2;
19745  }
19746  } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19747  SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19748  ConcatOps[0] = VecIn2;
19749  VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19750  } else {
19751  // TODO: Support cases where the length mismatch isn't exactly by a
19752  // factor of 2.
19753  // TODO: Move this check upwards, so that if we have bad type
19754  // mismatches, we don't create any DAG nodes.
19755  return SDValue();
19756  }
19757  }
19758 
19759  // Initialize mask to undef.
19760  SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19761 
19762  // Only need to run up to the number of elements actually used, not the
19763  // total number of elements in the shuffle - if we are shuffling a wider
19764  // vector, the high lanes should be set to undef.
19765  for (unsigned i = 0; i != NumElems; ++i) {
19766  if (VectorMask[i] <= 0)
19767  continue;
19768 
19769  unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19770  if (VectorMask[i] == (int)LeftIdx) {
19771  Mask[i] = ExtIndex;
19772  } else if (VectorMask[i] == (int)LeftIdx + 1) {
19773  Mask[i] = Vec2Offset + ExtIndex;
19774  }
19775  }
19776 
19777  // The type the input vectors may have changed above.
19778  InVT1 = VecIn1.getValueType();
19779 
19780  // If we already have a VecIn2, it should have the same type as VecIn1.
19781  // If we don't, get an undef/zero vector of the appropriate type.
19782  VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19783  assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19784 
19785  SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19786  if (ShuffleNumElems > NumElems)
19787  Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19788 
19789  return Shuffle;
19790 }
19791 
19793  assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19794 
19795  // First, determine where the build vector is not undef.
19796  // TODO: We could extend this to handle zero elements as well as undefs.
19797  int NumBVOps = BV->getNumOperands();
19798  int ZextElt = -1;
19799  for (int i = 0; i != NumBVOps; ++i) {
19800  SDValue Op = BV->getOperand(i);
19801  if (Op.isUndef())
19802  continue;
19803  if (ZextElt == -1)
19804  ZextElt = i;
19805  else
19806  return SDValue();
19807  }
19808  // Bail out if there's no non-undef element.
19809  if (ZextElt == -1)
19810  return SDValue();
19811 
19812  // The build vector contains some number of undef elements and exactly
19813  // one other element. That other element must be a zero-extended scalar
19814  // extracted from a vector at a constant index to turn this into a shuffle.
19815  // Also, require that the build vector does not implicitly truncate/extend
19816  // its elements.
19817  // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19818  EVT VT = BV->getValueType(0);
19819  SDValue Zext = BV->getOperand(ZextElt);
19820  if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19822  !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19823  Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19824  return SDValue();
19825 
19826  // The zero-extend must be a multiple of the source size, and we must be
19827  // building a vector of the same size as the source of the extract element.
19828  SDValue Extract = Zext.getOperand(0);
19829  unsigned DestSize = Zext.getValueSizeInBits();
19830  unsigned SrcSize = Extract.getValueSizeInBits();
19831  if (DestSize % SrcSize != 0 ||
19832  Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19833  return SDValue();
19834 
19835  // Create a shuffle mask that will combine the extracted element with zeros
19836  // and undefs.
19837  int ZextRatio = DestSize / SrcSize;
19838  int NumMaskElts = NumBVOps * ZextRatio;
19839  SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19840  for (int i = 0; i != NumMaskElts; ++i) {
19841  if (i / ZextRatio == ZextElt) {
19842  // The low bits of the (potentially translated) extracted element map to
19843  // the source vector. The high bits map to zero. We will use a zero vector
19844  // as the 2nd source operand of the shuffle, so use the 1st element of
19845  // that vector (mask value is number-of-elements) for the high bits.
19846  if (i % ZextRatio == 0)
19847  ShufMask[i] = Extract.getConstantOperandVal(1);
19848  else
19849  ShufMask[i] = NumMaskElts;
19850  }
19851 
19852  // Undef elements of the build vector remain undef because we initialize
19853  // the shuffle mask with -1.
19854  }
19855 
19856  // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19857  // bitcast (shuffle V, ZeroVec, VectorMask)
19858  SDLoc DL(BV);
19859  EVT VecVT = Extract.getOperand(0).getValueType();
19860  SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19861  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19862  SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19863  ZeroVec, ShufMask, DAG);
19864  if (!Shuf)
19865  return SDValue();
19866  return DAG.getBitcast(VT, Shuf);
19867 }
19868 
19869 // FIXME: promote to STLExtras.
19870 template <typename R, typename T>
19871 static auto getFirstIndexOf(R &&Range, const T &Val) {
19872  auto I = find(Range, Val);
19873  if (I == Range.end())
19874  return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19875  return std::distance(Range.begin(), I);
19876 }
19877 
19878 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19879 // operations. If the types of the vectors we're extracting from allow it,
19880 // turn this into a vector_shuffle node.
19881 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19882  SDLoc DL(N);
19883  EVT VT = N->getValueType(0);
19884 
19885  // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19886  if (!isTypeLegal(VT))
19887  return SDValue();
19888 
19890  return V;
19891 
19892  // May only combine to shuffle after legalize if shuffle is legal.
19893  if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19894  return SDValue();
19895 
19896  bool UsesZeroVector = false;
19897  unsigned NumElems = N->getNumOperands();
19898 
19899  // Record, for each element of the newly built vector, which input vector
19900  // that element comes from. -1 stands for undef, 0 for the zero vector,
19901  // and positive values for the input vectors.
19902  // VectorMask maps each element to its vector number, and VecIn maps vector
19903  // numbers to their initial SDValues.
19904 
19905  SmallVector<int, 8> VectorMask(NumElems, -1);
19907  VecIn.push_back(SDValue());
19908 
19909  for (unsigned i = 0; i != NumElems; ++i) {
19910  SDValue Op = N->getOperand(i);
19911 
19912  if (Op.isUndef())
19913  continue;
19914 
19915  // See if we can use a blend with a zero vector.
19916  // TODO: Should we generalize this to a blend with an arbitrary constant
19917  // vector?
19918  if (isNullConstant(Op) || isNullFPConstant(Op)) {
19919  UsesZeroVector = true;
19920  VectorMask[i] = 0;
19921  continue;
19922  }
19923 
19924  // Not an undef or zero. If the input is something other than an
19925  // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19926  if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19927  !isa<ConstantSDNode>(Op.getOperand(1)))
19928  return SDValue();
19929  SDValue ExtractedFromVec = Op.getOperand(0);
19930 
19931  if (ExtractedFromVec.getValueType().isScalableVector())
19932  return SDValue();
19933 
19934  const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19935  if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19936  return SDValue();
19937 
19938  // All inputs must have the same element type as the output.
19939  if (VT.getVectorElementType() !=
19940  ExtractedFromVec.getValueType().getVectorElementType())
19941  return SDValue();
19942 
19943  // Have we seen this input vector before?
19944  // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19945  // a map back from SDValues to numbers isn't worth it.
19946  int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19947  if (Idx == -1) { // A new source vector?
19948  Idx = VecIn.size();
19949  VecIn.push_back(ExtractedFromVec);
19950  }
19951 
19952  VectorMask[i] = Idx;
19953  }
19954 
19955  // If we didn't find at least one input vector, bail out.
19956  if (VecIn.size() < 2)
19957  return SDValue();
19958 
19959  // If all the Operands of BUILD_VECTOR extract from same
19960  // vector, then split the vector efficiently based on the maximum
19961  // vector access index and adjust the VectorMask and
19962  // VecIn accordingly.
19963  bool DidSplitVec = false;
19964  if (VecIn.size() == 2) {
19965  unsigned MaxIndex = 0;
19966  unsigned NearestPow2 = 0;
19967  SDValue Vec = VecIn.back();
19968  EVT InVT = Vec.getValueType();
19969  SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19970 
19971  for (unsigned i = 0; i < NumElems; i++) {
19972  if (VectorMask[i] <= 0)
19973  continue;
19974  unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19975  IndexVec[i] = Index;
19976  MaxIndex = std::max(MaxIndex, Index);
19977  }
19978 
19979  NearestPow2 = PowerOf2Ceil(MaxIndex);
19980  if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19981  NumElems * 2 < NearestPow2) {
19982  unsigned SplitSize = NearestPow2 / 2;
19983  EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19984  InVT.getVectorElementType(), SplitSize);
19985  if (TLI.isTypeLegal(SplitVT) &&
19986  SplitSize + SplitVT.getVectorNumElements() <=
19987  InVT.getVectorNumElements()) {
19988  SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19989  DAG.getVectorIdxConstant(SplitSize, DL));
19990  SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19991  DAG.getVectorIdxConstant(0, DL));
19992  VecIn.pop_back();
19993  VecIn.push_back(VecIn1);
19994  VecIn.push_back(VecIn2);
19995  DidSplitVec = true;
19996 
19997  for (unsigned i = 0; i < NumElems; i++) {
19998  if (VectorMask[i] <= 0)
19999  continue;
20000  VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
20001  }
20002  }
20003  }
20004  }
20005 
20006  // Sort input vectors by decreasing vector element count,
20007  // while preserving the relative order of equally-sized vectors.
20008  // Note that we keep the first "implicit zero vector as-is.
20009  SmallVector<SDValue, 8> SortedVecIn(VecIn);
20010  llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
20011  [](const SDValue &a, const SDValue &b) {
20012  return a.getValueType().getVectorNumElements() >
20014  });
20015 
20016  // We now also need to rebuild the VectorMask, because it referenced element
20017  // order in VecIn, and we just sorted them.
20018  for (int &SourceVectorIndex : VectorMask) {
20019  if (SourceVectorIndex <= 0)
20020  continue;
20021  unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
20022  assert(Idx > 0 && Idx < SortedVecIn.size() &&
20023  VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
20024  SourceVectorIndex = Idx;
20025  }
20026 
20027  VecIn = std::move(SortedVecIn);
20028 
20029  // TODO: Should this fire if some of the input vectors has illegal type (like
20030  // it does now), or should we let legalization run its course first?
20031 
20032  // Shuffle phase:
20033  // Take pairs of vectors, and shuffle them so that the result has elements
20034  // from these vectors in the correct places.
20035  // For example, given:
20036  // t10: i32 = extract_vector_elt t1, Constant:i64<0>
20037  // t11: i32 = extract_vector_elt t2, Constant:i64<0>
20038  // t12: i32 = extract_vector_elt t3, Constant:i64<0>
20039  // t13: i32 = extract_vector_elt t1, Constant:i64<1>
20040  // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
20041  // We will generate:
20042  // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
20043  // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
20044  SmallVector<SDValue, 4> Shuffles;
20045  for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
20046  unsigned LeftIdx = 2 * In + 1;
20047  SDValue VecLeft = VecIn[LeftIdx];
20048  SDValue VecRight =
20049  (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
20050 
20051  if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
20052  VecRight, LeftIdx, DidSplitVec))
20053  Shuffles.push_back(Shuffle);
20054  else
20055  return SDValue();
20056  }
20057 
20058  // If we need the zero vector as an "ingredient" in the blend tree, add it
20059  // to the list of shuffles.
20060  if (UsesZeroVector)
20061  Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
20062  : DAG.getConstantFP(0.0, DL, VT));
20063 
20064  // If we only have one shuffle, we're done.
20065  if (Shuffles.size() == 1)
20066  return Shuffles[0];
20067 
20068  // Update the vector mask to point to the post-shuffle vectors.
20069  for (int &Vec : VectorMask)
20070  if (Vec == 0)
20071  Vec = Shuffles.size() - 1;
20072  else
20073  Vec = (Vec - 1) / 2;
20074 
20075  // More than one shuffle. Generate a binary tree of blends, e.g. if from
20076  // the previous step we got the set of shuffles t10, t11, t12, t13, we will
20077  // generate:
20078  // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
20079  // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
20080  // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
20081  // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
20082  // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
20083  // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
20084  // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
20085 
20086  // Make sure the initial size of the shuffle list is even.
20087  if (Shuffles.size() % 2)
20088  Shuffles.push_back(DAG.getUNDEF(VT));
20089 
20090  for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
20091  if (CurSize % 2) {
20092  Shuffles[CurSize] = DAG.getUNDEF(VT);
20093  CurSize++;
20094  }
20095  for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
20096  int Left = 2 * In;
20097  int Right = 2 * In + 1;
20098  SmallVector<int, 8> Mask(NumElems, -1);
20099  for (unsigned i = 0; i != NumElems; ++i) {
20100  if (VectorMask[i] == Left) {
20101  Mask[i] = i;
20102  VectorMask[i] = In;
20103  } else if (VectorMask[i] == Right) {
20104  Mask[i] = i + NumElems;
20105  VectorMask[i] = In;
20106  }
20107  }
20108 
20109  Shuffles[In] =
20110  DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
20111  }
20112  }
20113  return Shuffles[0];
20114 }
20115 
20116 // Try to turn a build vector of zero extends of extract vector elts into a
20117 // a vector zero extend and possibly an extract subvector.
20118 // TODO: Support sign extend?
20119 // TODO: Allow undef elements?
20120 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
20121  if (LegalOperations)
20122  return SDValue();
20123 
20124  EVT VT = N->getValueType(0);
20125 
20126  bool FoundZeroExtend = false;
20127  SDValue Op0 = N->getOperand(0);
20128  auto checkElem = [&](SDValue Op) -> int64_t {
20129  unsigned Opc = Op.getOpcode();
20130  FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
20131  if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
20132  Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20133  Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
20134  if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
20135  return C->getZExtValue();
20136  return -1;
20137  };
20138 
20139  // Make sure the first element matches
20140  // (zext (extract_vector_elt X, C))
20141  // Offset must be a constant multiple of the
20142  // known-minimum vector length of the result type.
20143  int64_t Offset = checkElem(Op0);
20144  if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20145  return SDValue();
20146 
20147  unsigned NumElems = N->getNumOperands();
20148  SDValue In = Op0.getOperand(0).getOperand(0);
20149  EVT InSVT = In.getValueType().getScalarType();
20150  EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
20151 
20152  // Don't create an illegal input type after type legalization.
20153  if (LegalTypes && !TLI.isTypeLegal(InVT))
20154  return SDValue();
20155 
20156  // Ensure all the elements come from the same vector and are adjacent.
20157  for (unsigned i = 1; i != NumElems; ++i) {
20158  if ((Offset + i) != checkElem(N->getOperand(i)))
20159  return SDValue();
20160  }
20161 
20162  SDLoc DL(N);
20163  In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
20164  Op0.getOperand(0).getOperand(1));
20165  return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
20166  VT, In);
20167 }
20168 
20169 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20170  EVT VT = N->getValueType(0);
20171 
20172  // A vector built entirely of undefs is undef.
20173  if (ISD::allOperandsUndef(N))
20174  return DAG.getUNDEF(VT);
20175 
20176  // If this is a splat of a bitcast from another vector, change to a
20177  // concat_vector.
20178  // For example:
20179  // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20180  // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20181  //
20182  // If X is a build_vector itself, the concat can become a larger build_vector.
20183  // TODO: Maybe this is useful for non-splat too?
20184  if (!LegalOperations) {
20185  if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20186  Splat = peekThroughBitcasts(Splat);
20187  EVT SrcVT = Splat.getValueType();
20188  if (SrcVT.isVector()) {
20189  unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20190  EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
20191  SrcVT.getVectorElementType(), NumElts);
20192  if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20193  SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20195  NewVT, Ops);
20196  return DAG.getBitcast(VT, Concat);
20197  }
20198  }
20199  }
20200  }
20201 
20202  // Check if we can express BUILD VECTOR via subvector extract.
20203  if (!LegalTypes && (N->getNumOperands() > 1)) {
20204  SDValue Op0 = N->getOperand(0);
20205  auto checkElem = [&](SDValue Op) -> uint64_t {
20206  if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20207  (Op0.getOperand(0) == Op.getOperand(0)))
20208  if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20209  return CNode->getZExtValue();
20210  return -1;
20211  };
20212 
20213  int Offset = checkElem(Op0);
20214  for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20215  if (Offset + i != checkElem(N->getOperand(i))) {
20216  Offset = -1;
20217  break;
20218  }
20219  }
20220 
20221  if ((Offset == 0) &&
20222  (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20223  return Op0.getOperand(0);
20224  if ((Offset != -1) &&
20225  ((Offset % N->getValueType(0).getVectorNumElements()) ==
20226  0)) // IDX must be multiple of output size.
20227  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20228  Op0.getOperand(0), Op0.getOperand(1));
20229  }
20230 
20231  if (SDValue V = convertBuildVecZextToZext(N))
20232  return V;
20233 
20234  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
20235  return V;
20236 
20237  if (SDValue V = reduceBuildVecTruncToBitCast(N))
20238  return V;
20239 
20240  if (SDValue V = reduceBuildVecToShuffle(N))
20241  return V;
20242 
20243  // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20244  // Do this late as some of the above may replace the splat.
20246  if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20247  assert(!V.isUndef() && "Splat of undef should have been handled earlier");
20248  return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20249  }
20250 
20251  return SDValue();
20252 }
20253 
20255  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20256  EVT OpVT = N->getOperand(0).getValueType();
20257 
20258  // If the operands are legal vectors, leave them alone.
20259  if (TLI.isTypeLegal(OpVT))
20260  return SDValue();
20261 
20262  SDLoc DL(N);
20263  EVT VT = N->getValueType(0);
20265 
20266  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20267  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20268 
20269  // Keep track of what we encounter.
20270  bool AnyInteger = false;
20271  bool AnyFP = false;
20272  for (const SDValue &Op : N->ops()) {
20273  if (ISD::BITCAST == Op.getOpcode() &&
20274  !Op.getOperand(0).getValueType().isVector())
20275  Ops.push_back(Op.getOperand(0));
20276  else if (ISD::UNDEF == Op.getOpcode())
20277  Ops.push_back(ScalarUndef);
20278  else
20279  return SDValue();
20280 
20281  // Note whether we encounter an integer or floating point scalar.
20282  // If it's neither, bail out, it could be something weird like x86mmx.
20283  EVT LastOpVT = Ops.back().getValueType();
20284  if (LastOpVT.isFloatingPoint())
20285  AnyFP = true;
20286  else if (LastOpVT.isInteger())
20287  AnyInteger = true;
20288  else
20289  return SDValue();
20290  }
20291 
20292  // If any of the operands is a floating point scalar bitcast to a vector,
20293  // use floating point types throughout, and bitcast everything.
20294  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20295  if (AnyFP) {
20296  SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20297  ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20298  if (AnyInteger) {
20299  for (SDValue &Op : Ops) {
20300  if (Op.getValueType() == SVT)
20301  continue;
20302  if (Op.isUndef())
20303  Op = ScalarUndef;
20304  else
20305  Op = DAG.getBitcast(SVT, Op);
20306  }
20307  }
20308  }
20309 
20310  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20311  VT.getSizeInBits() / SVT.getSizeInBits());
20312  return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20313 }
20314 
20315 // Attempt to merge nested concat_vectors/undefs.
20316 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20317 // --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20319  SelectionDAG &DAG) {
20320  EVT VT = N->getValueType(0);
20321 
20322  // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20323  EVT SubVT;
20324  SDValue FirstConcat;
20325  for (const SDValue &Op : N->ops()) {
20326  if (Op.isUndef())
20327  continue;
20328  if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20329  return SDValue();
20330  if (!FirstConcat) {
20331  SubVT = Op.getOperand(0).getValueType();
20332  if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
20333  return SDValue();
20334  FirstConcat = Op;
20335  continue;
20336  }
20337  if (SubVT != Op.getOperand(0).getValueType())
20338  return SDValue();
20339  }
20340  assert(FirstConcat && "Concat of all-undefs found");
20341 
20342  SmallVector<SDValue> ConcatOps;
20343  for (const SDValue &Op : N->ops()) {
20344  if (Op.isUndef()) {
20345  ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20346  continue;
20347  }
20348  ConcatOps.append(Op->op_begin(), Op->op_end());
20349  }
20350  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20351 }
20352 
20353 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20354 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20355 // most two distinct vectors the same size as the result, attempt to turn this
20356 // into a legal shuffle.
20358  EVT VT = N->getValueType(0);
20359  EVT OpVT = N->getOperand(0).getValueType();
20360 
20361  // We currently can't generate an appropriate shuffle for a scalable vector.
20362  if (VT.isScalableVector())
20363  return SDValue();
20364 
20365  int NumElts = VT.getVectorNumElements();
20366  int NumOpElts = OpVT.getVectorNumElements();
20367 
20368  SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20370 
20371  for (SDValue Op : N->ops()) {
20373 
20374  // UNDEF nodes convert to UNDEF shuffle mask values.
20375  if (Op.isUndef()) {
20376  Mask.append((unsigned)NumOpElts, -1);
20377  continue;
20378  }
20379 
20380  if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20381  return SDValue();
20382 
20383  // What vector are we extracting the subvector from and at what index?
20384  SDValue ExtVec = Op.getOperand(0);
20385  int ExtIdx = Op.getConstantOperandVal(1);
20386 
20387  // We want the EVT of the original extraction to correctly scale the
20388  // extraction index.
20389  EVT ExtVT = ExtVec.getValueType();
20390  ExtVec = peekThroughBitcasts(ExtVec);
20391 
20392  // UNDEF nodes convert to UNDEF shuffle mask values.
20393  if (ExtVec.isUndef()) {
20394  Mask.append((unsigned)NumOpElts, -1);
20395  continue;
20396  }
20397 
20398  // Ensure that we are extracting a subvector from a vector the same
20399  // size as the result.
20400  if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20401  return SDValue();
20402 
20403  // Scale the subvector index to account for any bitcast.
20404  int NumExtElts = ExtVT.getVectorNumElements();
20405  if (0 == (NumExtElts % NumElts))
20406  ExtIdx /= (NumExtElts / NumElts);
20407  else if (0 == (NumElts % NumExtElts))
20408  ExtIdx *= (NumElts / NumExtElts);
20409  else
20410  return SDValue();
20411 
20412  // At most we can reference 2 inputs in the final shuffle.
20413  if (SV0.isUndef() || SV0 == ExtVec) {
20414  SV0 = ExtVec;
20415  for (int i = 0; i != NumOpElts; ++i)
20416  Mask.push_back(i + ExtIdx);
20417  } else if (SV1.isUndef() || SV1 == ExtVec) {
20418  SV1 = ExtVec;
20419  for (int i = 0; i != NumOpElts; ++i)
20420  Mask.push_back(i + ExtIdx + NumElts);
20421  } else {
20422  return SDValue();
20423  }
20424  }
20425 
20426  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20427  return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20428  DAG.getBitcast(VT, SV1), Mask, DAG);
20429 }
20430 
20432  unsigned CastOpcode = N->getOperand(0).getOpcode();
20433  switch (CastOpcode) {
20434  case ISD::SINT_TO_FP:
20435  case ISD::UINT_TO_FP:
20436  case ISD::FP_TO_SINT:
20437  case ISD::FP_TO_UINT:
20438  // TODO: Allow more opcodes?
20439  // case ISD::BITCAST:
20440  // case ISD::TRUNCATE:
20441  // case ISD::ZERO_EXTEND:
20442  // case ISD::SIGN_EXTEND:
20443  // case ISD::FP_EXTEND:
20444  break;
20445  default:
20446  return SDValue();
20447  }
20448 
20449  EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20450  if (!SrcVT.isVector())
20451  return SDValue();
20452 
20453  // All operands of the concat must be the same kind of cast from the same
20454  // source type.
20455  SmallVector<SDValue, 4> SrcOps;
20456  for (SDValue Op : N->ops()) {
20457  if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20458  Op.getOperand(0).getValueType() != SrcVT)
20459  return SDValue();
20460  SrcOps.push_back(Op.getOperand(0));
20461  }
20462 
20463  // The wider cast must be supported by the target. This is unusual because
20464  // the operation support type parameter depends on the opcode. In addition,
20465  // check the other type in the cast to make sure this is really legal.
20466  EVT VT = N->getValueType(0);
20467  EVT SrcEltVT = SrcVT.getVectorElementType();
20468  ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20469  EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20470  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20471  switch (CastOpcode) {
20472  case ISD::SINT_TO_FP:
20473  case ISD::UINT_TO_FP:
20474  if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20475  !TLI.isTypeLegal(VT))
20476  return SDValue();
20477  break;
20478  case ISD::FP_TO_SINT:
20479  case ISD::FP_TO_UINT:
20480  if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20481  !TLI.isTypeLegal(ConcatSrcVT))
20482  return SDValue();
20483  break;
20484  default:
20485  llvm_unreachable("Unexpected cast opcode");
20486  }
20487 
20488  // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20489  SDLoc DL(N);
20490  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20491  return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20492 }
20493 
20494 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20495  // If we only have one input vector, we don't need to do any concatenation.
20496  if (N->getNumOperands() == 1)
20497  return N->getOperand(0);
20498 
20499  // Check if all of the operands are undefs.
20500  EVT VT = N->getValueType(0);
20501  if (ISD::allOperandsUndef(N))
20502  return DAG.getUNDEF(VT);
20503 
20504  // Optimize concat_vectors where all but the first of the vectors are undef.
20505  if (all_of(drop_begin(N->ops()),
20506  [](const SDValue &Op) { return Op.isUndef(); })) {
20507  SDValue In = N->getOperand(0);
20508  assert(In.getValueType().isVector() && "Must concat vectors");
20509 
20510  // If the input is a concat_vectors, just make a larger concat by padding
20511  // with smaller undefs.
20512  if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20513  unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20514  SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20515  Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20516  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20517  }
20518 
20520 
20521  // concat_vectors(scalar_to_vector(scalar), undef) ->
20522  // scalar_to_vector(scalar)
20523  if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20524  Scalar.hasOneUse()) {
20525  EVT SVT = Scalar.getValueType().getVectorElementType();
20526  if (SVT == Scalar.getOperand(0).getValueType())
20527  Scalar = Scalar.getOperand(0);
20528  }
20529 
20530  // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20531  if (!Scalar.getValueType().isVector()) {
20532  // If the bitcast type isn't legal, it might be a trunc of a legal type;
20533  // look through the trunc so we can still do the transform:
20534  // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20535  if (Scalar->getOpcode() == ISD::TRUNCATE &&
20536  !TLI.isTypeLegal(Scalar.getValueType()) &&
20537  TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20538  Scalar = Scalar->getOperand(0);
20539 
20540  EVT SclTy = Scalar.getValueType();
20541 
20542  if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20543  return SDValue();
20544 
20545  // Bail out if the vector size is not a multiple of the scalar size.
20546  if (VT.getSizeInBits() % SclTy.getSizeInBits())
20547  return SDValue();
20548 
20549  unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20550  if (VNTNumElms < 2)
20551  return SDValue();
20552 
20553  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20554  if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20555  return SDValue();
20556 
20557  SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20558  return DAG.getBitcast(VT, Res);
20559  }
20560  }
20561 
20562  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20563  // We have already tested above for an UNDEF only concatenation.
20564  // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20565  // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20566  auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20567  return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20568  };
20569  if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20571  EVT SVT = VT.getScalarType();
20572 
20573  EVT MinVT = SVT;
20574  if (!SVT.isFloatingPoint()) {
20575  // If BUILD_VECTOR are from built from integer, they may have different
20576  // operand types. Get the smallest type and truncate all operands to it.
20577  bool FoundMinVT = false;
20578  for (const SDValue &Op : N->ops())
20579  if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20580  EVT OpSVT = Op.getOperand(0).getValueType();
20581  MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20582  FoundMinVT = true;
20583  }
20584  assert(FoundMinVT && "Concat vector type mismatch");
20585  }
20586 
20587  for (const SDValue &Op : N->ops()) {
20588  EVT OpVT = Op.getValueType();
20589  unsigned NumElts = OpVT.getVectorNumElements();
20590 
20591  if (ISD::UNDEF == Op.getOpcode())
20592  Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20593 
20594  if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20595  if (SVT.isFloatingPoint()) {
20596  assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20597  Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20598  } else {
20599  for (unsigned i = 0; i != NumElts; ++i)
20600  Opnds.push_back(
20601  DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20602  }
20603  }
20604  }
20605 
20606  assert(VT.getVectorNumElements() == Opnds.size() &&
20607  "Concat vector type mismatch");
20608  return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20609  }
20610 
20611  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20612  // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20613  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20614  return V;
20615 
20616  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20617  // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20619  return V;
20620 
20621  // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20622  if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20623  return V;
20624  }
20625 
20626  if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20627  return V;
20628 
20629  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20630  // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20631  // operands and look for a CONCAT operations that place the incoming vectors
20632  // at the exact same location.
20633  //
20634  // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20635  SDValue SingleSource = SDValue();
20636  unsigned PartNumElem =
20637  N->getOperand(0).getValueType().getVectorMinNumElements();
20638 
20639  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20640  SDValue Op = N->getOperand(i);
20641 
20642  if (Op.isUndef())
20643  continue;
20644 
20645  // Check if this is the identity extract:
20646  if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20647  return SDValue();
20648 
20649  // Find the single incoming vector for the extract_subvector.
20650  if (SingleSource.getNode()) {
20651  if (Op.getOperand(0) != SingleSource)
20652  return SDValue();
20653  } else {
20654  SingleSource = Op.getOperand(0);
20655 
20656  // Check the source type is the same as the type of the result.
20657  // If not, this concat may extend the vector, so we can not
20658  // optimize it away.
20659  if (SingleSource.getValueType() != N->getValueType(0))
20660  return SDValue();
20661  }
20662 
20663  // Check that we are reading from the identity index.
20664  unsigned IdentityIndex = i * PartNumElem;
20665  if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20666  return SDValue();
20667  }
20668 
20669  if (SingleSource.getNode())
20670  return SingleSource;
20671 
20672  return SDValue();
20673 }
20674 
20675 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20676 // if the subvector can be sourced for free.
20678  if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20679  V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20680  return V.getOperand(1);
20681  }
20682  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20683  if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20684  V.getOperand(0).getValueType() == SubVT &&
20685  (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20686  uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20687  return V.getOperand(SubIdx);
20688  }
20689  return SDValue();
20690 }
20691 
20693  SelectionDAG &DAG,
20694  bool LegalOperations) {
20695  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20696  SDValue BinOp = Extract->getOperand(0);
20697  unsigned BinOpcode = BinOp.getOpcode();
20698  if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20699  return SDValue();
20700 
20701  EVT VecVT = BinOp.getValueType();
20702  SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20703  if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20704  return SDValue();
20705 
20706  SDValue Index = Extract->getOperand(1);
20707  EVT SubVT = Extract->getValueType(0);
20708  if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20709  return SDValue();
20710 
20711  SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20712  SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20713 
20714  // TODO: We could handle the case where only 1 operand is being inserted by
20715  // creating an extract of the other operand, but that requires checking
20716  // number of uses and/or costs.
20717  if (!Sub0 || !Sub1)
20718  return SDValue();
20719 
20720  // We are inserting both operands of the wide binop only to extract back
20721  // to the narrow vector size. Eliminate all of the insert/extract:
20722  // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20723  return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20724  BinOp->getFlags());
20725 }
20726 
20727 /// If we are extracting a subvector produced by a wide binary operator try
20728 /// to use a narrow binary operator and/or avoid concatenation and extraction.
20730  bool LegalOperations) {
20731  // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20732  // some of these bailouts with other transforms.
20733 
20734  if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20735  return V;
20736 
20737  // The extract index must be a constant, so we can map it to a concat operand.
20738  auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20739  if (!ExtractIndexC)
20740  return SDValue();
20741 
20742  // We are looking for an optionally bitcasted wide vector binary operator
20743  // feeding an extract subvector.
20744  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20745  SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20746  unsigned BOpcode = BinOp.getOpcode();
20747  if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20748  return SDValue();
20749 
20750  // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20751  // reduced to the unary fneg when it is visited, and we probably want to deal
20752  // with fneg in a target-specific way.
20753  if (BOpcode == ISD::FSUB) {
20754  auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20755  if (C && C->getValueAPF().isNegZero())
20756  return SDValue();
20757  }
20758 
20759  // The binop must be a vector type, so we can extract some fraction of it.
20760  EVT WideBVT = BinOp.getValueType();
20761  // The optimisations below currently assume we are dealing with fixed length
20762  // vectors. It is possible to add support for scalable vectors, but at the
20763  // moment we've done no analysis to prove whether they are profitable or not.
20764  if (!WideBVT.isFixedLengthVector())
20765  return SDValue();
20766 
20767  EVT VT = Extract->getValueType(0);
20768  unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20769  assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
20770  "Extract index is not a multiple of the vector length.");
20771 
20772  // Bail out if this is not a proper multiple width extraction.
20773  unsigned WideWidth = WideBVT.getSizeInBits();
20774  unsigned NarrowWidth = VT.getSizeInBits();
20775  if (WideWidth % NarrowWidth != 0)
20776  return SDValue();
20777 
20778  // Bail out if we are extracting a fraction of a single operation. This can
20779  // occur because we potentially looked through a bitcast of the binop.
20780  unsigned NarrowingRatio = WideWidth / NarrowWidth;
20781  unsigned WideNumElts = WideBVT.getVectorNumElements();
20782  if (WideNumElts % NarrowingRatio != 0)
20783  return SDValue();
20784 
20785  // Bail out if the target does not support a narrower version of the binop.
20786  EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20787  WideNumElts / NarrowingRatio);
20788  if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20789  return SDValue();
20790 
20791  // If extraction is cheap, we don't need to look at the binop operands
20792  // for concat ops. The narrow binop alone makes this transform profitable.
20793  // We can't just reuse the original extract index operand because we may have
20794  // bitcasted.
20795  unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20796  unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20797  if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20798  BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20799  // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20800  SDLoc DL(Extract);
20801  SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20802  SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20803  BinOp.getOperand(0), NewExtIndex);
20804  SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20805  BinOp.getOperand(1), NewExtIndex);
20806  SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20807  BinOp.getNode()->getFlags());
20808  return DAG.getBitcast(VT, NarrowBinOp);
20809  }
20810 
20811  // Only handle the case where we are doubling and then halving. A larger ratio
20812  // may require more than two narrow binops to replace the wide binop.
20813  if (NarrowingRatio != 2)
20814  return SDValue();
20815 
20816  // TODO: The motivating case for this transform is an x86 AVX1 target. That
20817  // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20818  // flavors, but no other 256-bit integer support. This could be extended to
20819  // handle any binop, but that may require fixing/adding other folds to avoid
20820  // codegen regressions.
20821  if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20822  return SDValue();
20823 
20824  // We need at least one concatenation operation of a binop operand to make
20825  // this transform worthwhile. The concat must double the input vector sizes.
20826  auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20827  if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20828  return V.getOperand(ConcatOpNum);
20829  return SDValue();
20830  };
20831  SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20832  SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20833 
20834  if (SubVecL || SubVecR) {
20835  // If a binop operand was not the result of a concat, we must extract a
20836  // half-sized operand for our new narrow binop:
20837  // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20838  // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20839  // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20840  SDLoc DL(Extract);
20841  SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20842  SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20843  : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20844  BinOp.getOperand(0), IndexC);
20845 
20846  SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20847  : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20848  BinOp.getOperand(1), IndexC);
20849 
20850  SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20851  return DAG.getBitcast(VT, NarrowBinOp);
20852  }
20853 
20854  return SDValue();
20855 }
20856 
20857 /// If we are extracting a subvector from a wide vector load, convert to a
20858 /// narrow load to eliminate the extraction:
20859 /// (extract_subvector (load wide vector)) --> (load narrow vector)
20861  // TODO: Add support for big-endian. The offset calculation must be adjusted.
20862  if (DAG.getDataLayout().isBigEndian())
20863  return SDValue();
20864 
20865  auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20866  if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
20867  return SDValue();
20868 
20869  // Allow targets to opt-out.
20870  EVT VT = Extract->getValueType(0);
20871 
20872  // We can only create byte sized loads.
20873  if (!VT.isByteSized())
20874  return SDValue();
20875 
20876  unsigned Index = Extract->getConstantOperandVal(1);
20877  unsigned NumElts = VT.getVectorMinNumElements();
20878 
20879  // The definition of EXTRACT_SUBVECTOR states that the index must be a
20880  // multiple of the minimum number of elements in the result type.
20881  assert(Index % NumElts == 0 && "The extract subvector index is not a "
20882  "multiple of the result's element count");
20883 
20884  // It's fine to use TypeSize here as we know the offset will not be negative.
20885  TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20886 
20887  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20888  if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20889  return SDValue();
20890 
20891  // The narrow load will be offset from the base address of the old load if
20892  // we are extracting from something besides index 0 (little-endian).
20893  SDLoc DL(Extract);
20894 
20895  // TODO: Use "BaseIndexOffset" to make this more effective.
20896  SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20897 
20899  MachineFunction &MF = DAG.getMachineFunction();
20900  MachineMemOperand *MMO;
20901  if (Offset.isScalable()) {
20902  MachinePointerInfo MPI =
20904  MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20905  } else
20906  MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20907  StoreSize);
20908 
20909  SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20910  DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20911  return NewLd;
20912 }
20913 
20914 /// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
20915 /// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
20916 /// EXTRACT_SUBVECTOR(Op?, ?),
20917 /// Mask'))
20918 /// iff it is legal and profitable to do so. Notably, the trimmed mask
20919 /// (containing only the elements that are extracted)
20920 /// must reference at most two subvectors.
20922  SelectionDAG &DAG,
20923  const TargetLowering &TLI,
20924  bool LegalOperations) {
20925  assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20926  "Must only be called on EXTRACT_SUBVECTOR's");
20927 
20928  SDValue N0 = N->getOperand(0);
20929 
20930  // Only deal with non-scalable vectors.
20931  EVT NarrowVT = N->getValueType(0);
20932  EVT WideVT = N0.getValueType();
20933  if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
20934  return SDValue();
20935 
20936  // The operand must be a shufflevector.
20937  auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
20938  if (!WideShuffleVector)
20939  return SDValue();
20940 
20941  // The old shuffleneeds to go away.
20942  if (!WideShuffleVector->hasOneUse())
20943  return SDValue();
20944 
20945  // And the narrow shufflevector that we'll form must be legal.
20946  if (LegalOperations &&
20948  return SDValue();
20949 
20950  uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
20951  int NumEltsExtracted = NarrowVT.getVectorNumElements();
20952  assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
20953  "Extract index is not a multiple of the output vector length.");
20954 
20955  int WideNumElts = WideVT.getVectorNumElements();
20956 
20957  SmallVector<int, 16> NewMask;
20958  NewMask.reserve(NumEltsExtracted);
20959  SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
20960  DemandedSubvectors;
20961 
20962  // Try to decode the wide mask into narrow mask from at most two subvectors.
20963  for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
20964  NumEltsExtracted)) {
20965  assert((M >= -1) && (M < (2 * WideNumElts)) &&
20966  "Out-of-bounds shuffle mask?");
20967 
20968  if (M < 0) {
20969  // Does not depend on operands, does not require adjustment.
20970  NewMask.emplace_back(M);
20971  continue;
20972  }
20973 
20974  // From which operand of the shuffle does this shuffle mask element pick?
20975  int WideShufOpIdx = M / WideNumElts;
20976  // Which element of that operand is picked?
20977  int OpEltIdx = M % WideNumElts;
20978 
20979  assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
20980  "Shuffle mask vector decomposition failure.");
20981 
20982  // And which NumEltsExtracted-sized subvector of that operand is that?
20983  int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
20984  // And which element within that subvector of that operand is that?
20985  int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
20986 
20987  assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
20988  "Shuffle mask subvector decomposition failure.");
20989 
20990  assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
20991  WideShufOpIdx * WideNumElts) == M &&
20992  "Shuffle mask full decomposition failure.");
20993 
20994  SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
20995 
20996  if (Op.isUndef()) {
20997  // Picking from an undef operand. Let's adjust mask instead.
20998  NewMask.emplace_back(-1);
20999  continue;
21000  }
21001 
21002  // Profitability check: only deal with extractions from the first subvector.
21003  if (OpSubvecIdx != 0)
21004  return SDValue();
21005 
21006  const std::pair<SDValue, int> DemandedSubvector =
21007  std::make_pair(Op, OpSubvecIdx);
21008 
21009  if (DemandedSubvectors.insert(DemandedSubvector)) {
21010  if (DemandedSubvectors.size() > 2)
21011  return SDValue(); // We can't handle more than two subvectors.
21012  // How many elements into the WideVT does this subvector start?
21013  int Index = NumEltsExtracted * OpSubvecIdx;
21014  // Bail out if the extraction isn't going to be cheap.
21015  if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
21016  return SDValue();
21017  }
21018 
21019  // Ok, but from which operand of the new shuffle will this element pick?
21020  int NewOpIdx =
21021  getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
21022  assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
21023 
21024  int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
21025  NewMask.emplace_back(AdjM);
21026  }
21027  assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
21028  assert(DemandedSubvectors.size() <= 2 &&
21029  "Should have ended up demanding at most two subvectors.");
21030 
21031  // Did we discover that the shuffle does not actually depend on operands?
21032  if (DemandedSubvectors.empty())
21033  return DAG.getUNDEF(NarrowVT);
21034 
21035  // We still perform the exact same EXTRACT_SUBVECTOR, just on different
21036  // operand[s]/index[es], so there is no point in checking for it's legality.
21037 
21038  // Do not turn a legal shuffle into an illegal one.
21039  if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
21040  !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
21041  return SDValue();
21042 
21043  SDLoc DL(N);
21044 
21045  SmallVector<SDValue, 2> NewOps;
21046  for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
21047  &DemandedSubvector : DemandedSubvectors) {
21048  // How many elements into the WideVT does this subvector start?
21049  int Index = NumEltsExtracted * DemandedSubvector.second;
21050  SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
21051  NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
21052  DemandedSubvector.first, IndexC));
21053  }
21054  assert((NewOps.size() == 1 || NewOps.size() == 2) &&
21055  "Should end up with either one or two ops");
21056 
21057  // If we ended up with only one operand, pad with an undef.
21058  if (NewOps.size() == 1)
21059  NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
21060 
21061  return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
21062 }
21063 
21064 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
21065  EVT NVT = N->getValueType(0);
21066  SDValue V = N->getOperand(0);
21067  uint64_t ExtIdx = N->getConstantOperandVal(1);
21068 
21069  // Extract from UNDEF is UNDEF.
21070  if (V.isUndef())
21071  return DAG.getUNDEF(NVT);
21072 
21074  if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
21075  return NarrowLoad;
21076 
21077  // Combine an extract of an extract into a single extract_subvector.
21078  // ext (ext X, C), 0 --> ext X, C
21079  if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
21080  if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
21081  V.getConstantOperandVal(1)) &&
21083  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
21084  V.getOperand(1));
21085  }
21086  }
21087 
21088  // Try to move vector bitcast after extract_subv by scaling extraction index:
21089  // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
21090  if (V.getOpcode() == ISD::BITCAST &&
21091  V.getOperand(0).getValueType().isVector() &&
21092  (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
21093  SDValue SrcOp = V.getOperand(0);
21094  EVT SrcVT = SrcOp.getValueType();
21095  unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
21096  unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
21097  if ((SrcNumElts % DestNumElts) == 0) {
21098  unsigned SrcDestRatio = SrcNumElts / DestNumElts;
21099  ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
21100  EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
21101  NewExtEC);
21102  if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21103  SDLoc DL(N);
21104  SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
21105  SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21106  V.getOperand(0), NewIndex);
21107  return DAG.getBitcast(NVT, NewExtract);
21108  }
21109  }
21110  if ((DestNumElts % SrcNumElts) == 0) {
21111  unsigned DestSrcRatio = DestNumElts / SrcNumElts;
21112  if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
21113  ElementCount NewExtEC =
21114  NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
21115  EVT ScalarVT = SrcVT.getScalarType();
21116  if ((ExtIdx % DestSrcRatio) == 0) {
21117  SDLoc DL(N);
21118  unsigned IndexValScaled = ExtIdx / DestSrcRatio;
21119  EVT NewExtVT =
21120  EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
21121  if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21122  SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21123  SDValue NewExtract =
21124  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21125  V.getOperand(0), NewIndex);
21126  return DAG.getBitcast(NVT, NewExtract);
21127  }
21128  if (NewExtEC.isScalar() &&
21130  SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21131  SDValue NewExtract =
21132  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
21133  V.getOperand(0), NewIndex);
21134  return DAG.getBitcast(NVT, NewExtract);
21135  }
21136  }
21137  }
21138  }
21139  }
21140 
21141  if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21142  unsigned ExtNumElts = NVT.getVectorMinNumElements();
21143  EVT ConcatSrcVT = V.getOperand(0).getValueType();
21144  assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
21145  "Concat and extract subvector do not change element type");
21146  assert((ExtIdx % ExtNumElts) == 0 &&
21147  "Extract index is not a multiple of the input vector length.");
21148 
21149  unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21150  unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21151 
21152  // If the concatenated source types match this extract, it's a direct
21153  // simplification:
21154  // extract_subvec (concat V1, V2, ...), i --> Vi
21155  if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21156  return V.getOperand(ConcatOpIdx);
21157 
21158  // If the concatenated source vectors are a multiple length of this extract,
21159  // then extract a fraction of one of those source vectors directly from a
21160  // concat operand. Example:
21161  // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21162  // v2i8 extract_subvec v8i8 Y, 6
21163  if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21164  ConcatSrcNumElts % ExtNumElts == 0) {
21165  SDLoc DL(N);
21166  unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
21167  assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
21168  "Trying to extract from >1 concat operand?");
21169  assert(NewExtIdx % ExtNumElts == 0 &&
21170  "Extract index is not a multiple of the input vector length.");
21171  SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
21172  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21173  V.getOperand(ConcatOpIdx), NewIndexC);
21174  }
21175  }
21176 
21177  if (SDValue V =
21178  foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21179  return V;
21180 
21181  V = peekThroughBitcasts(V);
21182 
21183  // If the input is a build vector. Try to make a smaller build vector.
21184  if (V.getOpcode() == ISD::BUILD_VECTOR) {
21185  EVT InVT = V.getValueType();
21186  unsigned ExtractSize = NVT.getSizeInBits();
21187  unsigned EltSize = InVT.getScalarSizeInBits();
21188  // Only do this if we won't split any elements.
21189  if (ExtractSize % EltSize == 0) {
21190  unsigned NumElems = ExtractSize / EltSize;
21191  EVT EltVT = InVT.getVectorElementType();
21192  EVT ExtractVT =
21193  NumElems == 1 ? EltVT
21194  : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
21195  if ((Level < AfterLegalizeDAG ||
21196  (NumElems == 1 ||
21197  TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
21198  (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21199  unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21200 
21201  if (NumElems == 1) {
21202  SDValue Src = V->getOperand(IdxVal);
21203  if (EltVT != Src.getValueType())
21204  Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21205  return DAG.getBitcast(NVT, Src);
21206  }
21207 
21208  // Extract the pieces from the original build_vector.
21209  SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
21210  V->ops().slice(IdxVal, NumElems));
21211  return DAG.getBitcast(NVT, BuildVec);
21212  }
21213  }
21214  }
21215 
21216  if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21217  // Handle only simple case where vector being inserted and vector
21218  // being extracted are of same size.
21219  EVT SmallVT = V.getOperand(1).getValueType();
21220  if (!NVT.bitsEq(SmallVT))
21221  return SDValue();
21222 
21223  // Combine:
21224  // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21225  // Into:
21226  // indices are equal or bit offsets are equal => V1
21227  // otherwise => (extract_subvec V1, ExtIdx)
21228  uint64_t InsIdx = V.getConstantOperandVal(2);
21229  if (InsIdx * SmallVT.getScalarSizeInBits() ==
21230  ExtIdx * NVT.getScalarSizeInBits()) {
21231  if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21232  return SDValue();
21233 
21234  return DAG.getBitcast(NVT, V.getOperand(1));
21235  }
21236  return DAG.getNode(
21238  DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21239  N->getOperand(1));
21240  }
21241 
21242  if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21243  return NarrowBOp;
21244 
21246  return SDValue(N, 0);
21247 
21248  return SDValue();
21249 }
21250 
21251 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21252 /// followed by concatenation. Narrow vector ops may have better performance
21253 /// than wide ops, and this can unlock further narrowing of other vector ops.
21254 /// Targets can invert this transform later if it is not profitable.
21256  SelectionDAG &DAG) {
21257  SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21258  if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21259  N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21260  !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21261  return SDValue();
21262 
21263  // Split the wide shuffle mask into halves. Any mask element that is accessing
21264  // operand 1 is offset down to account for narrowing of the vectors.
21265  ArrayRef<int> Mask = Shuf->getMask();
21266  EVT VT = Shuf->getValueType(0);
21267  unsigned NumElts = VT.getVectorNumElements();
21268  unsigned HalfNumElts = NumElts / 2;
21269  SmallVector<int, 16> Mask0(HalfNumElts, -1);
21270  SmallVector<int, 16> Mask1(HalfNumElts, -1);
21271  for (unsigned i = 0; i != NumElts; ++i) {
21272  if (Mask[i] == -1)
21273  continue;
21274  // If we reference the upper (undef) subvector then the element is undef.
21275  if ((Mask[i] % NumElts) >= HalfNumElts)
21276  continue;
21277  int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21278  if (i < HalfNumElts)
21279  Mask0[i] = M;
21280  else
21281  Mask1[i - HalfNumElts] = M;
21282  }
21283 
21284  // Ask the target if this is a valid transform.
21285  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21286  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
21287  HalfNumElts);
21288  if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21289  !TLI.isShuffleMaskLegal(Mask1, HalfVT))
21290  return SDValue();
21291 
21292  // shuffle (concat X, undef), (concat Y, undef), Mask -->
21293  // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21294  SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21295  SDLoc DL(Shuf);
21296  SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
21297  SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
21298  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21299 }
21300 
21301 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21302 // or turn a shuffle of a single concat into simpler shuffle then concat.
21304  EVT VT = N->getValueType(0);
21305  unsigned NumElts = VT.getVectorNumElements();
21306 
21307  SDValue N0 = N->getOperand(0);
21308  SDValue N1 = N->getOperand(1);
21309  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21310  ArrayRef<int> Mask = SVN->getMask();
21311 
21313  EVT ConcatVT = N0.getOperand(0).getValueType();
21314  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21315  unsigned NumConcats = NumElts / NumElemsPerConcat;
21316 
21317  auto IsUndefMaskElt = [](int i) { return i == -1; };
21318 
21319  // Special case: shuffle(concat(A,B)) can be more efficiently represented
21320  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21321  // half vector elements.
21322  if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21323  llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
21324  IsUndefMaskElt)) {
21325  N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21326  N0.getOperand(1),
21327  Mask.slice(0, NumElemsPerConcat));
21328  N1 = DAG.getUNDEF(ConcatVT);
21329  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21330  }
21331 
21332  // Look at every vector that's inserted. We're looking for exact
21333  // subvector-sized copies from a concatenated vector
21334  for (unsigned I = 0; I != NumConcats; ++I) {
21335  unsigned Begin = I * NumElemsPerConcat;
21336  ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21337 
21338  // Make sure we're dealing with a copy.
21339  if (llvm::all_of(SubMask, IsUndefMaskElt)) {
21340  Ops.push_back(DAG.getUNDEF(ConcatVT));
21341  continue;
21342  }
21343 
21344  int OpIdx = -1;
21345  for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21346  if (IsUndefMaskElt(SubMask[i]))
21347  continue;
21348  if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21349  return SDValue();
21350  int EltOpIdx = SubMask[i] / NumElemsPerConcat;
21351  if (0 <= OpIdx && EltOpIdx != OpIdx)
21352  return SDValue();
21353  OpIdx = EltOpIdx;
21354  }
21355  assert(0 <= OpIdx && "Unknown concat_vectors op");
21356 
21357  if (OpIdx < (int)N0.getNumOperands())
21358  Ops.push_back(N0.getOperand(OpIdx));
21359  else
21360  Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21361  }
21362 
21363  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21364 }
21365 
21366 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21367 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21368 //
21369 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
21370 // a simplification in some sense, but it isn't appropriate in general: some
21371 // BUILD_VECTORs are substantially cheaper than others. The general case
21372 // of a BUILD_VECTOR requires inserting each element individually (or
21373 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
21374 // all constants is a single constant pool load. A BUILD_VECTOR where each
21375 // element is identical is a splat. A BUILD_VECTOR where most of the operands
21376 // are undef lowers to a small number of element insertions.
21377 //
21378 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
21379 // We don't fold shuffles where one side is a non-zero constant, and we don't
21380 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
21381 // non-constant operands. This seems to work out reasonably well in practice.
21383  SelectionDAG &DAG,
21384  const TargetLowering &TLI) {
21385  EVT VT = SVN->getValueType(0);
21386  unsigned NumElts = VT.getVectorNumElements();
21387  SDValue N0 = SVN->getOperand(0);
21388  SDValue N1 = SVN->getOperand(1);
21389 
21390  if (!N0->hasOneUse())
21391  return SDValue();
21392 
21393  // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
21394  // discussed above.
21395  if (!N1.isUndef()) {
21396  if (!N1->hasOneUse())
21397  return SDValue();
21398 
21399  bool N0AnyConst = isAnyConstantBuildVector(N0);
21400  bool N1AnyConst = isAnyConstantBuildVector(N1);
21401  if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
21402  return SDValue();
21403  if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
21404  return SDValue();
21405  }
21406 
21407  // If both inputs are splats of the same value then we can safely merge this
21408  // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
21409  bool IsSplat = false;
21410  auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
21411  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
21412  if (BV0 && BV1)
21413  if (SDValue Splat0 = BV0->getSplatValue())
21414  IsSplat = (Splat0 == BV1->getSplatValue());
21415 
21417  SmallSet<SDValue, 16> DuplicateOps;
21418  for (int M : SVN->getMask()) {
21419  SDValue Op = DAG.getUNDEF(VT.getScalarType());
21420  if (M >= 0) {
21421  int Idx = M < (int)NumElts ? M : M - NumElts;
21422  SDValue &S = (M < (int)NumElts ? N0 : N1);
21423  if (S.getOpcode() == ISD::BUILD_VECTOR) {
21424  Op = S.getOperand(Idx);
21425  } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
21426  SDValue Op0 = S.getOperand(0);
21427  Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
21428  } else {
21429  // Operand can't be combined - bail out.
21430  return SDValue();
21431  }
21432  }
21433 
21434  // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
21435  // generating a splat; semantically, this is fine, but it's likely to
21436  // generate low-quality code if the target can't reconstruct an appropriate
21437  // shuffle.
21438  if (!Op.isUndef() && !isIntOrFPConstant(Op))
21439  if (!IsSplat && !DuplicateOps.insert(Op).second)
21440  return SDValue();
21441 
21442  Ops.push_back(Op);
21443  }
21444 
21445  // BUILD_VECTOR requires all inputs to be of the same type, find the
21446  // maximum type and extend them all.
21447  EVT SVT = VT.getScalarType();
21448  if (SVT.isInteger())
21449  for (SDValue &Op : Ops)
21450  SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
21451  if (SVT != VT.getScalarType())
21452  for (SDValue &Op : Ops)
21453  Op = TLI.isZExtFree(Op.getValueType(), SVT)
21454  ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
21455  : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
21456  return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
21457 }
21458 
21459 // Match shuffles that can be converted to any_vector_extend_in_reg.
21460 // This is often generated during legalization.
21461 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
21462 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
21464  SelectionDAG &DAG,
21465  const TargetLowering &TLI,
21466  bool LegalOperations) {
21467  EVT VT = SVN->getValueType(0);
21468  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21469 
21470  // TODO Add support for big-endian when we have a test case.
21471  if (!VT.isInteger() || IsBigEndian)
21472  return SDValue();
21473 
21474  unsigned NumElts = VT.getVectorNumElements();
21475  unsigned EltSizeInBits = VT.getScalarSizeInBits();
21476  ArrayRef<int> Mask = SVN->getMask();
21477  SDValue N0 = SVN->getOperand(0);
21478 
21479  // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
21480  auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
21481  for (unsigned i = 0; i != NumElts; ++i) {
21482  if (Mask[i] < 0)
21483  continue;
21484  if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
21485  continue;
21486  return false;
21487  }
21488  return true;
21489  };
21490 
21491  // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
21492  // power-of-2 extensions as they are the most likely.
21493  for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
21494  // Check for non power of 2 vector sizes
21495  if (NumElts % Scale != 0)
21496  continue;
21497  if (!isAnyExtend(Scale))
21498  continue;
21499 
21500  EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
21501  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
21502  // Never create an illegal type. Only create unsupported operations if we
21503  // are pre-legalization.
21504  if (TLI.isTypeLegal(OutVT))
21505  if (!LegalOperations ||
21507  return DAG.getBitcast(VT,
21509  SDLoc(SVN), OutVT, N0));
21510  }
21511 
21512  return SDValue();
21513 }
21514 
21515 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
21516 // each source element of a large type into the lowest elements of a smaller
21517 // destination type. This is often generated during legalization.
21518 // If the source node itself was a '*_extend_vector_inreg' node then we should
21519 // then be able to remove it.
21521  SelectionDAG &DAG) {
21522  EVT VT = SVN->getValueType(0);
21523  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21524 
21525  // TODO Add support for big-endian when we have a test case.
21526  if (!VT.isInteger() || IsBigEndian)
21527  return SDValue();
21528 
21529  SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
21530 
21531  unsigned Opcode = N0.getOpcode();
21532  if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
21533  Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
21535  return SDValue();
21536 
21537  SDValue N00 = N0.getOperand(0);
21538  ArrayRef<int> Mask = SVN->getMask();
21539  unsigned NumElts = VT.getVectorNumElements();
21540  unsigned EltSizeInBits = VT.getScalarSizeInBits();
21541  unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
21542  unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
21543 
21544  if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
21545  return SDValue();
21546  unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
21547 
21548  // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
21549  // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
21550  // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
21551  auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
21552  for (unsigned i = 0; i != NumElts; ++i) {
21553  if (Mask[i] < 0)
21554  continue;
21555  if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
21556  continue;
21557  return false;
21558  }
21559  return true;
21560  };
21561 
21562  // At the moment we just handle the case where we've truncated back to the
21563  // same size as before the extension.
21564  // TODO: handle more extension/truncation cases as cases arise.
21565  if (EltSizeInBits != ExtSrcSizeInBits)
21566  return SDValue();
21567 
21568  // We can remove *extend_vector_inreg only if the truncation happens at
21569  // the same scale as the extension.
21570  if (isTruncate(ExtScale))
21571  return DAG.getBitcast(VT, N00);
21572 
21573  return SDValue();
21574 }
21575 
21576 // Combine shuffles of splat-shuffles of the form:
21577 // shuffle (shuffle V, undef, splat-mask), undef, M
21578 // If splat-mask contains undef elements, we need to be careful about
21579 // introducing undef's in the folded mask which are not the result of composing
21580 // the masks of the shuffles.
21582  SelectionDAG &DAG) {
21583  if (!Shuf->getOperand(1).isUndef())
21584  return SDValue();
21585  auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21586  if (!Splat || !Splat->isSplat())
21587  return SDValue();
21588 
21589  ArrayRef<int> ShufMask = Shuf->getMask();
21590  ArrayRef<int> SplatMask = Splat->getMask();
21591  assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
21592 
21593  // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21594  // every undef mask element in the splat-shuffle has a corresponding undef
21595  // element in the user-shuffle's mask or if the composition of mask elements
21596  // would result in undef.
21597  // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21598  // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21599  // In this case it is not legal to simplify to the splat-shuffle because we
21600  // may be exposing the users of the shuffle an undef element at index 1
21601  // which was not there before the combine.
21602  // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21603  // In this case the composition of masks yields SplatMask, so it's ok to
21604  // simplify to the splat-shuffle.
21605  // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21606  // In this case the composed mask includes all undef elements of SplatMask
21607  // and in addition sets element zero to undef. It is safe to simplify to
21608  // the splat-shuffle.
21609  auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21610  ArrayRef<int> SplatMask) {
21611  for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21612  if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21613  SplatMask[UserMask[i]] != -1)
21614  return false;
21615  return true;
21616  };
21617  if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21618  return Shuf->getOperand(0);
21619 
21620  // Create a new shuffle with a mask that is composed of the two shuffles'
21621  // masks.
21622  SmallVector<int, 32> NewMask;
21623  for (int Idx : ShufMask)
21624  NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21625 
21626  return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21627  Splat->getOperand(0), Splat->getOperand(1),
21628  NewMask);
21629 }
21630 
21631 /// Combine shuffle of shuffle of the form:
21632 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21634  SelectionDAG &DAG) {
21635  if (!OuterShuf->getOperand(1).isUndef())
21636  return SDValue();
21637  auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21638  if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21639  return SDValue();
21640 
21641  ArrayRef<int> OuterMask = OuterShuf->getMask();
21642  ArrayRef<int> InnerMask = InnerShuf->getMask();
21643  unsigned NumElts = OuterMask.size();
21644  assert(NumElts == InnerMask.size() && "Mask length mismatch");
21645  SmallVector<int, 32> CombinedMask(NumElts, -1);
21646  int SplatIndex = -1;
21647  for (unsigned i = 0; i != NumElts; ++i) {
21648  // Undef lanes remain undef.
21649  int OuterMaskElt = OuterMask[i];
21650  if (OuterMaskElt == -1)
21651  continue;
21652 
21653  // Peek through the shuffle masks to get the underlying source element.
21654  int InnerMaskElt = InnerMask[OuterMaskElt];
21655  if (InnerMaskElt == -1)
21656  continue;
21657 
21658  // Initialize the splatted element.
21659  if (SplatIndex == -1)
21660  SplatIndex = InnerMaskElt;
21661 
21662  // Non-matching index - this is not a splat.
21663  if (SplatIndex != InnerMaskElt)
21664  return SDValue();
21665 
21666  CombinedMask[i] = InnerMaskElt;
21667  }
21668  assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
21669  getSplatIndex(CombinedMask) != -1) &&
21670  "Expected a splat mask");
21671 
21672  // TODO: The transform may be a win even if the mask is not legal.
21673  EVT VT = OuterShuf->getValueType(0);
21674  assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
21675  if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21676  return SDValue();
21677 
21678  return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21679  InnerShuf->getOperand(1), CombinedMask);
21680 }
21681 
21682 /// If the shuffle mask is taking exactly one element from the first vector
21683 /// operand and passing through all other elements from the second vector
21684 /// operand, return the index of the mask element that is choosing an element
21685 /// from the first operand. Otherwise, return -1.
21687  int MaskSize = Mask.size();
21688  int EltFromOp0 = -1;
21689  // TODO: This does not match if there are undef elements in the shuffle mask.
21690  // Should we ignore undefs in the shuffle mask instead? The trade-off is
21691  // removing an instruction (a shuffle), but losing the knowledge that some
21692  // vector lanes are not needed.
21693  for (int i = 0; i != MaskSize; ++i) {
21694  if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21695  // We're looking for a shuffle of exactly one element from operand 0.
21696  if (EltFromOp0 != -1)
21697  return -1;
21698  EltFromOp0 = i;
21699  } else if (Mask[i] != i + MaskSize) {
21700  // Nothing from operand 1 can change lanes.
21701  return -1;
21702  }
21703  }
21704  return EltFromOp0;
21705 }
21706 
21707 /// If a shuffle inserts exactly one element from a source vector operand into
21708 /// another vector operand and we can access the specified element as a scalar,
21709 /// then we can eliminate the shuffle.
21711  SelectionDAG &DAG) {
21712  // First, check if we are taking one element of a vector and shuffling that
21713  // element into another vector.
21714  ArrayRef<int> Mask = Shuf->getMask();
21715  SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21716  SDValue Op0 = Shuf->getOperand(0);
21717  SDValue Op1 = Shuf->getOperand(1);
21719  if (ShufOp0Index == -1) {
21720  // Commute mask and check again.
21721  ShuffleVectorSDNode::commuteMask(CommutedMask);
21722  ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21723  if (ShufOp0Index == -1)
21724  return SDValue();
21725  // Commute operands to match the commuted shuffle mask.
21726  std::swap(Op0, Op1);
21727  Mask = CommutedMask;
21728  }
21729 
21730  // The shuffle inserts exactly one element from operand 0 into operand 1.
21731  // Now see if we can access that element as a scalar via a real insert element
21732  // instruction.
21733  // TODO: We can try harder to locate the element as a scalar. Examples: it
21734  // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21735  assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21736  "Shuffle mask value must be from operand 0");
21737  if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21738  return SDValue();
21739 
21740  auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21741  if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21742  return SDValue();
21743 
21744  // There's an existing insertelement with constant insertion index, so we
21745  // don't need to check the legality/profitability of a replacement operation
21746  // that differs at most in the constant value. The target should be able to
21747  // lower any of those in a similar way. If not, legalization will expand this
21748  // to a scalar-to-vector plus shuffle.
21749  //
21750  // Note that the shuffle may move the scalar from the position that the insert
21751  // element used. Therefore, our new insert element occurs at the shuffle's
21752  // mask index value, not the insert's index value.
21753  // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21754  SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21755  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21756  Op1, Op0.getOperand(1), NewInsIndex);
21757 }
21758 
21759 /// If we have a unary shuffle of a shuffle, see if it can be folded away
21760 /// completely. This has the potential to lose undef knowledge because the first
21761 /// shuffle may not have an undef mask element where the second one does. So
21762 /// only call this after doing simplifications based on demanded elements.
21764  // shuf (shuf0 X, Y, Mask0), undef, Mask
21765  auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21766  if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21767  return SDValue();
21768 
21769  ArrayRef<int> Mask = Shuf->getMask();
21770  ArrayRef<int> Mask0 = Shuf0->getMask();
21771  for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21772  // Ignore undef elements.
21773  if (Mask[i] == -1)
21774  continue;
21775  assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
21776 
21777  // Is the element of the shuffle operand chosen by this shuffle the same as
21778  // the element chosen by the shuffle operand itself?
21779  if (Mask0[Mask[i]] != Mask0[i])
21780  return SDValue();
21781  }
21782  // Every element of this shuffle is identical to the result of the previous
21783  // shuffle, so we can replace this value.
21784  return Shuf->getOperand(0);
21785 }
21786 
21787 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21788  EVT VT = N->getValueType(0);
21789  unsigned NumElts = VT.getVectorNumElements();
21790 
21791  SDValue N0 = N->getOperand(0);
21792  SDValue N1 = N->getOperand(1);
21793 
21794  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
21795 
21796  // Canonicalize shuffle undef, undef -> undef
21797  if (N0.isUndef() && N1.isUndef())
21798  return DAG.getUNDEF(VT);
21799 
21800  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21801 
21802  // Canonicalize shuffle v, v -> v, undef
21803  if (N0 == N1)
21804  return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
21805  createUnaryMask(SVN->getMask(), NumElts));
21806 
21807  // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
21808  if (N0.isUndef())
21809  return DAG.getCommutedVectorShuffle(*SVN);
21810 
21811  // Remove references to rhs if it is undef
21812  if (N1.isUndef()) {
21813  bool Changed = false;
21814  SmallVector<int, 8> NewMask;
21815  for (unsigned i = 0; i != NumElts; ++i) {
21816  int Idx = SVN->getMaskElt(i);
21817  if (Idx >= (int)NumElts) {
21818  Idx = -1;
21819  Changed = true;
21820  }
21821  NewMask.push_back(Idx);
21822  }
21823  if (Changed)
21824  return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21825  }
21826 
21827  if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21828  return InsElt;
21829 
21830  // A shuffle of a single vector that is a splatted value can always be folded.
21831  if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21832  return V;
21833 
21834  if (SDValue V = formSplatFromShuffles(SVN, DAG))
21835  return V;
21836 
21837  // If it is a splat, check if the argument vector is another splat or a
21838  // build_vector.
21839  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21840  int SplatIndex = SVN->getSplatIndex();
21841  if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21842  TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21843  // splat (vector_bo L, R), Index -->
21844  // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21845  SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21846  SDLoc DL(N);
21847  EVT EltVT = VT.getScalarType();
21848  SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21849  SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21850  SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21851  SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21852  N0.getNode()->getFlags());
21853  SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21854  SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21855  return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21856  }
21857 
21858  // If this is a bit convert that changes the element type of the vector but
21859  // not the number of vector elements, look through it. Be careful not to
21860  // look though conversions that change things like v4f32 to v2f64.
21861  SDNode *V = N0.getNode();
21862  if (V->getOpcode() == ISD::BITCAST) {
21863  SDValue ConvInput = V->getOperand(0);
21864  if (ConvInput.getValueType().isVector() &&
21865  ConvInput.getValueType().getVectorNumElements() == NumElts)
21866  V = ConvInput.getNode();
21867  }
21868 
21869  if (V->getOpcode() == ISD::BUILD_VECTOR) {
21870  assert(V->getNumOperands() == NumElts &&
21871  "BUILD_VECTOR has wrong number of operands");
21872  SDValue Base;
21873  bool AllSame = true;
21874  for (unsigned i = 0; i != NumElts; ++i) {
21875  if (!V->getOperand(i).isUndef()) {
21876  Base = V->getOperand(i);
21877  break;
21878  }
21879  }
21880  // Splat of <u, u, u, u>, return <u, u, u, u>
21881  if (!Base.getNode())
21882  return N0;
21883  for (unsigned i = 0; i != NumElts; ++i) {
21884  if (V->getOperand(i) != Base) {
21885  AllSame = false;
21886  break;
21887  }
21888  }
21889  // Splat of <x, x, x, x>, return <x, x, x, x>
21890  if (AllSame)
21891  return N0;
21892 
21893  // Canonicalize any other splat as a build_vector.
21894  SDValue Splatted = V->getOperand(SplatIndex);
21895  SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21896  SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21897 
21898  // We may have jumped through bitcasts, so the type of the
21899  // BUILD_VECTOR may not match the type of the shuffle.
21900  if (V->getValueType(0) != VT)
21901  NewBV = DAG.getBitcast(VT, NewBV);
21902  return NewBV;
21903  }
21904  }
21905 
21906  // Simplify source operands based on shuffle mask.
21908  return SDValue(N, 0);
21909 
21910  // This is intentionally placed after demanded elements simplification because
21911  // it could eliminate knowledge of undef elements created by this shuffle.
21912  if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21913  return ShufOp;
21914 
21915  // Match shuffles that can be converted to any_vector_extend_in_reg.
21916  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21917  return V;
21918 
21919  // Combine "truncate_vector_in_reg" style shuffles.
21920  if (SDValue V = combineTruncationShuffle(SVN, DAG))
21921  return V;
21922 
21923  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21925  (N1.isUndef() ||
21926  (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21927  N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21928  if (SDValue V = partitionShuffleOfConcats(N, DAG))
21929  return V;
21930  }
21931 
21932  // A shuffle of a concat of the same narrow vector can be reduced to use
21933  // only low-half elements of a concat with undef:
21934  // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21935  if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21936  N0.getNumOperands() == 2 &&
21937  N0.getOperand(0) == N0.getOperand(1)) {
21938  int HalfNumElts = (int)NumElts / 2;
21939  SmallVector<int, 8> NewMask;
21940  for (unsigned i = 0; i != NumElts; ++i) {
21941  int Idx = SVN->getMaskElt(i);
21942  if (Idx >= HalfNumElts) {
21943  assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21944  Idx -= HalfNumElts;
21945  }
21946  NewMask.push_back(Idx);
21947  }
21948  if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21949  SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21950  SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21951  N0.getOperand(0), UndefVec);
21952  return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21953  }
21954  }
21955 
21956  // See if we can replace a shuffle with an insert_subvector.
21957  // e.g. v2i32 into v8i32:
21958  // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21959  // --> insert_subvector(lhs,rhs1,4).
21960  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21962  auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
21963  // Ensure RHS subvectors are legal.
21964  assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
21965  EVT SubVT = RHS.getOperand(0).getValueType();
21966  int NumSubVecs = RHS.getNumOperands();
21967  int NumSubElts = SubVT.getVectorNumElements();
21968  assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
21969  if (!TLI.isTypeLegal(SubVT))
21970  return SDValue();
21971 
21972  // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21973  if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21974  return SDValue();
21975 
21976  // Search [NumSubElts] spans for RHS sequence.
21977  // TODO: Can we avoid nested loops to increase performance?
21978  SmallVector<int> InsertionMask(NumElts);
21979  for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21980  for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21981  // Reset mask to identity.
21982  std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21983 
21984  // Add subvector insertion.
21985  std::iota(InsertionMask.begin() + SubIdx,
21986  InsertionMask.begin() + SubIdx + NumSubElts,
21987  NumElts + (SubVec * NumSubElts));
21988 
21989  // See if the shuffle mask matches the reference insertion mask.
21990  bool MatchingShuffle = true;
21991  for (int i = 0; i != (int)NumElts; ++i) {
21992  int ExpectIdx = InsertionMask[i];
21993  int ActualIdx = Mask[i];
21994  if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21995  MatchingShuffle = false;
21996  break;
21997  }
21998  }
21999 
22000  if (MatchingShuffle)
22001  return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
22002  RHS.getOperand(SubVec),
22003  DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
22004  }
22005  }
22006  return SDValue();
22007  };
22008  ArrayRef<int> Mask = SVN->getMask();
22009  if (N1.getOpcode() == ISD::CONCAT_VECTORS)
22010  if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
22011  return InsertN1;
22012  if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
22013  SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
22014  ShuffleVectorSDNode::commuteMask(CommuteMask);
22015  if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
22016  return InsertN0;
22017  }
22018  }
22019 
22020  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
22021  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
22022  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
22023  if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
22024  return Res;
22025 
22026  // If this shuffle only has a single input that is a bitcasted shuffle,
22027  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
22028  // back to their original types.
22029  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
22030  N1.isUndef() && Level < AfterLegalizeVectorOps &&
22031  TLI.isTypeLegal(VT)) {
22032 
22034  if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
22035  EVT SVT = VT.getScalarType();
22036  EVT InnerVT = BC0->getValueType(0);
22037  EVT InnerSVT = InnerVT.getScalarType();
22038 
22039  // Determine which shuffle works with the smaller scalar type.
22040  EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
22041  EVT ScaleSVT = ScaleVT.getScalarType();
22042 
22043  if (TLI.isTypeLegal(ScaleVT) &&
22044  0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
22045  0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
22046  int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22047  int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22048 
22049  // Scale the shuffle masks to the smaller scalar type.
22050  ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
22051  SmallVector<int, 8> InnerMask;
22052  SmallVector<int, 8> OuterMask;
22053  narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
22054  narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
22055 
22056  // Merge the shuffle masks.
22057  SmallVector<int, 8> NewMask;
22058  for (int M : OuterMask)
22059  NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
22060 
22061  // Test for shuffle mask legality over both commutations.
22062  SDValue SV0 = BC0->getOperand(0);
22063  SDValue SV1 = BC0->getOperand(1);
22064  bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22065  if (!LegalMask) {
22066  std::swap(SV0, SV1);
22068  LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22069  }
22070 
22071  if (LegalMask) {
22072  SV0 = DAG.getBitcast(ScaleVT, SV0);
22073  SV1 = DAG.getBitcast(ScaleVT, SV1);
22074  return DAG.getBitcast(
22075  VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
22076  }
22077  }
22078  }
22079  }
22080 
22081  // Compute the combined shuffle mask for a shuffle with SV0 as the first
22082  // operand, and SV1 as the second operand.
22083  // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
22084  // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
22085  auto MergeInnerShuffle =
22086  [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
22087  ShuffleVectorSDNode *OtherSVN, SDValue N1,
22088  const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
22089  SmallVectorImpl<int> &Mask) -> bool {
22090  // Don't try to fold splats; they're likely to simplify somehow, or they
22091  // might be free.
22092  if (OtherSVN->isSplat())
22093  return false;
22094 
22095  SV0 = SV1 = SDValue();
22096  Mask.clear();
22097 
22098  for (unsigned i = 0; i != NumElts; ++i) {
22099  int Idx = SVN->getMaskElt(i);
22100  if (Idx < 0) {
22101  // Propagate Undef.
22102  Mask.push_back(Idx);
22103  continue;
22104  }
22105 
22106  if (Commute)
22107  Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
22108 
22109  SDValue CurrentVec;
22110  if (Idx < (int)NumElts) {
22111  // This shuffle index refers to the inner shuffle N0. Lookup the inner
22112  // shuffle mask to identify which vector is actually referenced.
22113  Idx = OtherSVN->getMaskElt(Idx);
22114  if (Idx < 0) {
22115  // Propagate Undef.
22116  Mask.push_back(Idx);
22117  continue;
22118  }
22119  CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
22120  : OtherSVN->getOperand(1);
22121  } else {
22122  // This shuffle index references an element within N1.
22123  CurrentVec = N1;
22124  }
22125 
22126  // Simple case where 'CurrentVec' is UNDEF.
22127  if (CurrentVec.isUndef()) {
22128  Mask.push_back(-1);
22129  continue;
22130  }
22131 
22132  // Canonicalize the shuffle index. We don't know yet if CurrentVec
22133  // will be the first or second operand of the combined shuffle.
22134  Idx = Idx % NumElts;
22135  if (!SV0.getNode() || SV0 == CurrentVec) {
22136  // Ok. CurrentVec is the left hand side.
22137  // Update the mask accordingly.
22138  SV0 = CurrentVec;
22139  Mask.push_back(Idx);
22140  continue;
22141  }
22142  if (!SV1.getNode() || SV1 == CurrentVec) {
22143  // Ok. CurrentVec is the right hand side.
22144  // Update the mask accordingly.
22145  SV1 = CurrentVec;
22146  Mask.push_back(Idx + NumElts);
22147  continue;
22148  }
22149 
22150  // Last chance - see if the vector is another shuffle and if it
22151  // uses one of the existing candidate shuffle ops.
22152  if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
22153  int InnerIdx = CurrentSVN->getMaskElt(Idx);
22154  if (InnerIdx < 0) {
22155  Mask.push_back(-1);
22156  continue;
22157  }
22158  SDValue InnerVec = (InnerIdx < (int)NumElts)
22159  ? CurrentSVN->getOperand(0)
22160  : CurrentSVN->getOperand(1);
22161  if (InnerVec.isUndef()) {
22162  Mask.push_back(-1);
22163  continue;
22164  }
22165  InnerIdx %= NumElts;
22166  if (InnerVec == SV0) {
22167  Mask.push_back(InnerIdx);
22168  continue;
22169  }
22170  if (InnerVec == SV1) {
22171  Mask.push_back(InnerIdx + NumElts);
22172  continue;
22173  }
22174  }
22175 
22176  // Bail out if we cannot convert the shuffle pair into a single shuffle.
22177  return false;
22178  }
22179 
22180  if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22181  return true;
22182 
22183  // Avoid introducing shuffles with illegal mask.
22184  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22185  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22186  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22187  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22188  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22189  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22190  if (TLI.isShuffleMaskLegal(Mask, VT))
22191  return true;
22192 
22193  std::swap(SV0, SV1);
22195  return TLI.isShuffleMaskLegal(Mask, VT);
22196  };
22197 
22198  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22199  // Canonicalize shuffles according to rules:
22200  // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22201  // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22202  // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22203  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22204  N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
22205  // The incoming shuffle must be of the same type as the result of the
22206  // current shuffle.
22207  assert(N1->getOperand(0).getValueType() == VT &&
22208  "Shuffle types don't match");
22209 
22210  SDValue SV0 = N1->getOperand(0);
22211  SDValue SV1 = N1->getOperand(1);
22212  bool HasSameOp0 = N0 == SV0;
22213  bool IsSV1Undef = SV1.isUndef();
22214  if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22215  // Commute the operands of this shuffle so merging below will trigger.
22216  return DAG.getCommutedVectorShuffle(*SVN);
22217  }
22218 
22219  // Canonicalize splat shuffles to the RHS to improve merging below.
22220  // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22221  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22222  N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22223  cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22224  !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22225  return DAG.getCommutedVectorShuffle(*SVN);
22226  }
22227 
22228  // Try to fold according to rules:
22229  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22230  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22231  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22232  // Don't try to fold shuffles with illegal type.
22233  // Only fold if this shuffle is the only user of the other shuffle.
22234  // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22235  for (int i = 0; i != 2; ++i) {
22236  if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22237  N->isOnlyUserOf(N->getOperand(i).getNode())) {
22238  // The incoming shuffle must be of the same type as the result of the
22239  // current shuffle.
22240  auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22241  assert(OtherSV->getOperand(0).getValueType() == VT &&
22242  "Shuffle types don't match");
22243 
22244  SDValue SV0, SV1;
22246  if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
22247  SV0, SV1, Mask)) {
22248  // Check if all indices in Mask are Undef. In case, propagate Undef.
22249  if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22250  return DAG.getUNDEF(VT);
22251 
22252  return DAG.getVectorShuffle(VT, SDLoc(N),
22253  SV0 ? SV0 : DAG.getUNDEF(VT),
22254  SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
22255  }
22256  }
22257  }
22258 
22259  // Merge shuffles through binops if we are able to merge it with at least
22260  // one other shuffles.
22261  // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
22262  // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
22263  unsigned SrcOpcode = N0.getOpcode();
22264  if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
22265  (N1.isUndef() ||
22266  (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
22267  // Get binop source ops, or just pass on the undef.
22268  SDValue Op00 = N0.getOperand(0);
22269  SDValue Op01 = N0.getOperand(1);
22270  SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
22271  SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
22272  // TODO: We might be able to relax the VT check but we don't currently
22273  // have any isBinOp() that has different result/ops VTs so play safe until
22274  // we have test coverage.
22275  if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
22276  Op01.getValueType() == VT && Op11.getValueType() == VT &&
22277  (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
22278  Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
22279  Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
22280  Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
22281  auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
22282  SmallVectorImpl<int> &Mask, bool LeftOp,
22283  bool Commute) {
22284  SDValue InnerN = Commute ? N1 : N0;
22285  SDValue Op0 = LeftOp ? Op00 : Op01;
22286  SDValue Op1 = LeftOp ? Op10 : Op11;
22287  if (Commute)
22288  std::swap(Op0, Op1);
22289  // Only accept the merged shuffle if we don't introduce undef elements,
22290  // or the inner shuffle already contained undef elements.
22291  auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
22292  return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
22293  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
22294  Mask) &&
22295  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
22296  llvm::none_of(Mask, [](int M) { return M < 0; }));
22297  };
22298 
22299  // Ensure we don't increase the number of shuffles - we must merge a
22300  // shuffle from at least one of the LHS and RHS ops.
22301  bool MergedLeft = false;
22302  SDValue LeftSV0, LeftSV1;
22303  SmallVector<int, 4> LeftMask;
22304  if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
22305  CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
22306  MergedLeft = true;
22307  } else {
22308  LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22309  LeftSV0 = Op00, LeftSV1 = Op10;
22310  }
22311 
22312  bool MergedRight = false;
22313  SDValue RightSV0, RightSV1;
22314  SmallVector<int, 4> RightMask;
22315  if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
22316  CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
22317  MergedRight = true;
22318  } else {
22319  RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22320  RightSV0 = Op01, RightSV1 = Op11;
22321  }
22322 
22323  if (MergedLeft || MergedRight) {
22324  SDLoc DL(N);
22326  VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
22327  LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
22329  VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
22330  RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
22331  return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
22332  }
22333  }
22334  }
22335  }
22336 
22337  if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
22338  return V;
22339 
22340  return SDValue();
22341 }
22342 
22343 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
22344  SDValue InVal = N->getOperand(0);
22345  EVT VT = N->getValueType(0);
22346 
22347  // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
22348  // with a VECTOR_SHUFFLE and possible truncate.
22349  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22350  VT.isFixedLengthVector() &&
22352  SDValue InVec = InVal->getOperand(0);
22353  SDValue EltNo = InVal->getOperand(1);
22354  auto InVecT = InVec.getValueType();
22355  if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
22356  SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
22357  int Elt = C0->getZExtValue();
22358  NewMask[0] = Elt;
22359  // If we have an implict truncate do truncate here as long as it's legal.
22360  // if it's not legal, this should
22361  if (VT.getScalarType() != InVal.getValueType() &&
22362  InVal.getValueType().isScalarInteger() &&
22363  isTypeLegal(VT.getScalarType())) {
22364  SDValue Val =
22365  DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
22366  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
22367  }
22368  if (VT.getScalarType() == InVecT.getScalarType() &&
22369  VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
22370  SDValue LegalShuffle =
22371  TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
22372  DAG.getUNDEF(InVecT), NewMask, DAG);
22373  if (LegalShuffle) {
22374  // If the initial vector is the correct size this shuffle is a
22375  // valid result.
22376  if (VT == InVecT)
22377  return LegalShuffle;
22378  // If not we must truncate the vector.
22379  if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
22380  SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
22381  EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
22382  InVecT.getVectorElementType(),
22383  VT.getVectorNumElements());
22384  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
22385  LegalShuffle, ZeroIdx);
22386  }
22387  }
22388  }
22389  }
22390  }
22391 
22392  return SDValue();
22393 }
22394 
22395 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
22396  EVT VT = N->getValueType(0);
22397  SDValue N0 = N->getOperand(0);
22398  SDValue N1 = N->getOperand(1);
22399  SDValue N2 = N->getOperand(2);
22400  uint64_t InsIdx = N->getConstantOperandVal(2);
22401 
22402  // If inserting an UNDEF, just return the original vector.
22403  if (N1.isUndef())
22404  return N0;
22405 
22406  // If this is an insert of an extracted vector into an undef vector, we can
22407  // just use the input to the extract.
22408  if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22409  N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
22410  return N1.getOperand(0);
22411 
22412  // If we are inserting a bitcast value into an undef, with the same
22413  // number of elements, just use the bitcast input of the extract.
22414  // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
22415  // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
22416  if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
22418  N1.getOperand(0).getOperand(1) == N2 &&
22420  VT.getVectorElementCount() &&
22422  VT.getSizeInBits()) {
22423  return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
22424  }
22425 
22426  // If both N1 and N2 are bitcast values on which insert_subvector
22427  // would makes sense, pull the bitcast through.
22428  // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
22429  // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
22430  if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
22431  SDValue CN0 = N0.getOperand(0);
22432  SDValue CN1 = N1.getOperand(0);
22433  EVT CN0VT = CN0.getValueType();
22434  EVT CN1VT = CN1.getValueType();
22435  if (CN0VT.isVector() && CN1VT.isVector() &&
22436  CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
22437  CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
22438  SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
22439  CN0.getValueType(), CN0, CN1, N2);
22440  return DAG.getBitcast(VT, NewINSERT);
22441  }
22442  }
22443 
22444  // Combine INSERT_SUBVECTORs where we are inserting to the same index.
22445  // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
22446  // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
22447  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22448  N0.getOperand(1).getValueType() == N1.getValueType() &&
22449  N0.getOperand(2) == N2)
22450  return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
22451  N1, N2);
22452 
22453  // Eliminate an intermediate insert into an undef vector:
22454  // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
22455  // insert_subvector undef, X, N2
22456  if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
22457  N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
22458  return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
22459  N1.getOperand(1), N2);
22460 
22461  // Push subvector bitcasts to the output, adjusting the index as we go.
22462  // insert_subvector(bitcast(v), bitcast(s), c1)
22463  // -> bitcast(insert_subvector(v, s, c2))
22464  if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
22465  N1.getOpcode() == ISD::BITCAST) {
22466  SDValue N0Src = peekThroughBitcasts(N0);
22467  SDValue N1Src = peekThroughBitcasts(N1);
22468  EVT N0SrcSVT = N0Src.getValueType().getScalarType();
22469  EVT N1SrcSVT = N1Src.getValueType().getScalarType();
22470  if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
22471  N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
22472  EVT NewVT;
22473  SDLoc DL(N);
22474  SDValue NewIdx;
22475  LLVMContext &Ctx = *DAG.getContext();
22476  ElementCount NumElts = VT.getVectorElementCount();
22477  unsigned EltSizeInBits = VT.getScalarSizeInBits();
22478  if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
22479  unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
22480  NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
22481  NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
22482  } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
22483  unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
22484  if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
22485  NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
22486  NumElts.divideCoefficientBy(Scale));
22487  NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
22488  }
22489  }
22490  if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
22491  SDValue Res = DAG.getBitcast(NewVT, N0Src);
22492  Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
22493  return DAG.getBitcast(VT, Res);
22494  }
22495  }
22496  }
22497 
22498  // Canonicalize insert_subvector dag nodes.
22499  // Example:
22500  // (insert_subvector (insert_subvector A, Idx0), Idx1)
22501  // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
22502  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
22503  N1.getValueType() == N0.getOperand(1).getValueType()) {
22504  unsigned OtherIdx = N0.getConstantOperandVal(2);
22505  if (InsIdx < OtherIdx) {
22506  // Swap nodes.
22507  SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
22508  N0.getOperand(0), N1, N2);
22509  AddToWorklist(NewOp.getNode());
22510  return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
22511  VT, NewOp, N0.getOperand(1), N0.getOperand(2));
22512  }
22513  }
22514 
22515  // If the input vector is a concatenation, and the insert replaces
22516  // one of the pieces, we can optimize into a single concat_vectors.
22517  if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
22518  N0.getOperand(0).getValueType() == N1.getValueType() &&
22520  N1.getValueType().isScalableVector()) {
22521  unsigned Factor = N1.getValueType().getVectorMinNumElements();
22522  SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
22523  Ops[InsIdx / Factor] = N1;
22524  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
22525  }
22526 
22527  // Simplify source operands based on insertion.
22529  return SDValue(N, 0);
22530 
22531  return SDValue();
22532 }
22533 
22534 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
22535  SDValue N0 = N->getOperand(0);
22536 
22537  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
22538  if (N0->getOpcode() == ISD::FP16_TO_FP)
22539  return N0->getOperand(0);
22540 
22541  return SDValue();
22542 }
22543 
22544 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
22545  SDValue N0 = N->getOperand(0);
22546 
22547  // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
22548  if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
22550  if (AndConst && AndConst->getAPIntValue() == 0xffff) {
22551  return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
22552  N0.getOperand(0));
22553  }
22554  }
22555 
22556  return SDValue();
22557 }
22558 
22559 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
22560  SDValue N0 = N->getOperand(0);
22561  EVT VT = N0.getValueType();
22562  unsigned Opcode = N->getOpcode();
22563 
22564  // VECREDUCE over 1-element vector is just an extract.
22565  if (VT.getVectorElementCount().isScalar()) {
22566  SDLoc dl(N);
22567  SDValue Res =
22569  DAG.getVectorIdxConstant(0, dl));
22570  if (Res.getValueType() != N->getValueType(0))
22571  Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22572  return Res;
22573  }
22574 
22575  // On an boolean vector an and/or reduction is the same as a umin/umax
22576  // reduction. Convert them if the latter is legal while the former isn't.
22577  if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22578  unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22580  if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22581  TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22582  DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
22583  return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22584  }
22585 
22586  return SDValue();
22587 }
22588 
22589 SDValue DAGCombiner::visitVPOp(SDNode *N) {
22590  // VP operations in which all vector elements are disabled - either by
22591  // determining that the mask is all false or that the EVL is 0 - can be
22592  // eliminated.
22593  bool AreAllEltsDisabled = false;
22594  if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
22595  AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
22596  if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
22597  AreAllEltsDisabled |=
22598  ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
22599 
22600  // This is the only generic VP combine we support for now.
22601  if (!AreAllEltsDisabled)
22602  return SDValue();
22603 
22604  // Binary operations can be replaced by UNDEF.
22605  if (ISD::isVPBinaryOp(N->getOpcode()))
22606  return DAG.getUNDEF(N->getValueType(0));
22607 
22608  // VP Memory operations can be replaced by either the chain (stores) or the
22609  // chain + undef (loads).
22610  if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
22611  if (MemSD->writeMem())
22612  return MemSD->getChain();
22613  return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
22614  }
22615 
22616  // Reduction operations return the start operand when no elements are active.
22617  if (ISD::isVPReduction(N->getOpcode()))
22618  return N->getOperand(0);
22619 
22620  return SDValue();
22621 }
22622 
22623 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22624 /// with the destination vector and a zero vector.
22625 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22626 /// vector_shuffle V, Zero, <0, 4, 2, 4>
22627 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22628  assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
22629 
22630  EVT VT = N->getValueType(0);
22631  SDValue LHS = N->getOperand(0);
22632  SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22633  SDLoc DL(N);
22634 
22635  // Make sure we're not running after operation legalization where it
22636  // may have custom lowered the vector shuffles.
22637  if (LegalOperations)
22638  return SDValue();
22639 
22640  if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22641  return SDValue();
22642 
22643  EVT RVT = RHS.getValueType();
22644  unsigned NumElts = RHS.getNumOperands();
22645 
22646  // Attempt to create a valid clear mask, splitting the mask into
22647  // sub elements and checking to see if each is
22648  // all zeros or all ones - suitable for shuffle masking.
22649  auto BuildClearMask = [&](int Split) {
22650  int NumSubElts = NumElts * Split;
22651  int NumSubBits = RVT.getScalarSizeInBits() / Split;
22652 
22653  SmallVector<int, 8> Indices;
22654  for (int i = 0; i != NumSubElts; ++i) {
22655  int EltIdx = i / Split;
22656  int SubIdx = i % Split;
22657  SDValue Elt = RHS.getOperand(EltIdx);
22658  // X & undef --> 0 (not undef). So this lane must be converted to choose
22659  // from the zero constant vector (same as if the element had all 0-bits).
22660  if (Elt.isUndef()) {
22661  Indices.push_back(i + NumSubElts);
22662  continue;
22663  }
22664 
22665  APInt Bits;
22666  if (isa<ConstantSDNode>(Elt))
22667  Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22668  else if (isa<ConstantFPSDNode>(Elt))
22669  Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22670  else
22671  return SDValue();
22672 
22673  // Extract the sub element from the constant bit mask.
22674  if (DAG.getDataLayout().isBigEndian())
22675  Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22676  else
22677  Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22678 
22679  if (Bits.isAllOnes())
22680  Indices.push_back(i);
22681  else if (Bits == 0)
22682  Indices.push_back(i + NumSubElts);
22683  else
22684  return SDValue();
22685  }
22686 
22687  // Let's see if the target supports this vector_shuffle.
22688  EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22689  EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22690  if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22691  return SDValue();
22692 
22693  SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22694  return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22695  DAG.getBitcast(ClearVT, LHS),
22696  Zero, Indices));
22697  };
22698 
22699  // Determine maximum split level (byte level masking).
22700  int MaxSplit = 1;
22701  if (RVT.getScalarSizeInBits() % 8 == 0)
22702  MaxSplit = RVT.getScalarSizeInBits() / 8;
22703 
22704  for (int Split = 1; Split <= MaxSplit; ++Split)
22705  if (RVT.getScalarSizeInBits() % Split == 0)
22706  if (SDValue S = BuildClearMask(Split))
22707  return S;
22708 
22709  return SDValue();
22710 }
22711 
22712 /// If a vector binop is performed on splat values, it may be profitable to
22713 /// extract, scalarize, and insert/splat.
22715  const SDLoc &DL) {
22716  SDValue N0 = N->getOperand(0);
22717  SDValue N1 = N->getOperand(1);
22718  unsigned Opcode = N->getOpcode();
22719  EVT VT = N->getValueType(0);
22720  EVT EltVT = VT.getVectorElementType();
22721  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22722 
22723  // TODO: Remove/replace the extract cost check? If the elements are available
22724  // as scalars, then there may be no extract cost. Should we ask if
22725  // inserting a scalar back into a vector is cheap instead?
22726  int Index0, Index1;
22727  SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22728  SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22729  if (!Src0 || !Src1 || Index0 != Index1 ||
22730  Src0.getValueType().getVectorElementType() != EltVT ||
22731  Src1.getValueType().getVectorElementType() != EltVT ||
22732  !TLI.isExtractVecEltCheap(VT, Index0) ||
22733  !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22734  return SDValue();
22735 
22736  SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22737  SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22738  SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22739  SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22740 
22741  // If all lanes but 1 are undefined, no need to splat the scalar result.
22742  // TODO: Keep track of undefs and use that info in the general case.
22743  if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22744  count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22745  count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22746  // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22747  // build_vec ..undef, (bo X, Y), undef...
22749  Ops[Index0] = ScalarBO;
22750  return DAG.getBuildVector(VT, DL, Ops);
22751  }
22752 
22753  // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22754  SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
22755  return DAG.getBuildVector(VT, DL, Ops);
22756 }
22757 
22758 /// Visit a binary vector operation, like ADD.
22759 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
22760  EVT VT = N->getValueType(0);
22761  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
22762 
22763  SDValue LHS = N->getOperand(0);
22764  SDValue RHS = N->getOperand(1);
22765  unsigned Opcode = N->getOpcode();
22766  SDNodeFlags Flags = N->getFlags();
22767 
22768  // Move unary shuffles with identical masks after a vector binop:
22769  // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22770  // --> shuffle (VBinOp A, B), Undef, Mask
22771  // This does not require type legality checks because we are creating the
22772  // same types of operations that are in the original sequence. We do have to
22773  // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22774  // though. This code is adapted from the identical transform in instcombine.
22775  if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22776  Opcode != ISD::UREM && Opcode != ISD::SREM &&
22777  Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22778  auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22779  auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22780  if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22781  LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22782  (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22783  SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22784  RHS.getOperand(0), Flags);
22785  SDValue UndefV = LHS.getOperand(1);
22786  return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22787  }
22788 
22789  // Try to sink a splat shuffle after a binop with a uniform constant.
22790  // This is limited to cases where neither the shuffle nor the constant have
22791  // undefined elements because that could be poison-unsafe or inhibit
22792  // demanded elements analysis. It is further limited to not change a splat
22793  // of an inserted scalar because that may be optimized better by
22794  // load-folding or other target-specific behaviors.
22795  if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22796  Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22797  Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22798  // binop (splat X), (splat C) --> splat (binop X, C)
22799  SDValue X = Shuf0->getOperand(0);
22800  SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22801  return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22802  Shuf0->getMask());
22803  }
22804  if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22805  Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22806  Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22807  // binop (splat C), (splat X) --> splat (binop C, X)
22808  SDValue X = Shuf1->getOperand(0);
22809  SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22810  return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22811  Shuf1->getMask());
22812  }
22813  }
22814 
22815  // The following pattern is likely to emerge with vector reduction ops. Moving
22816  // the binary operation ahead of insertion may allow using a narrower vector
22817  // instruction that has better performance than the wide version of the op:
22818  // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22819  if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22820  RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22821  LHS.getOperand(2) == RHS.getOperand(2) &&
22822  (LHS.hasOneUse() || RHS.hasOneUse())) {
22823  SDValue X = LHS.getOperand(1);
22824  SDValue Y = RHS.getOperand(1);
22825  SDValue Z = LHS.getOperand(2);
22826  EVT NarrowVT = X.getValueType();
22827  if (NarrowVT == Y.getValueType() &&
22828  TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22829  LegalOperations)) {
22830  // (binop undef, undef) may not return undef, so compute that result.
22831  SDValue VecC =
22832  DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22833  SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22834  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22835  }
22836  }
22837 
22838  // Make sure all but the first op are undef or constant.
22839  auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22840  return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22841  all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22842  return Op.isUndef() ||
22843  ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22844  });
22845  };
22846 
22847  // The following pattern is likely to emerge with vector reduction ops. Moving
22848  // the binary operation ahead of the concat may allow using a narrower vector
22849  // instruction that has better performance than the wide version of the op:
22850  // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22851  // concat (VBinOp X, Y), VecC
22852  if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22853  (LHS.hasOneUse() || RHS.hasOneUse())) {
22854  EVT NarrowVT = LHS.getOperand(0).getValueType();
22855  if (NarrowVT == RHS.getOperand(0).getValueType() &&
22856  TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22857  unsigned NumOperands = LHS.getNumOperands();
22858  SmallVector<SDValue, 4> ConcatOps;
22859  for (unsigned i = 0; i != NumOperands; ++i) {
22860  // This constant fold for operands 1 and up.
22861  ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22862  RHS.getOperand(i)));
22863  }
22864 
22865  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22866  }
22867  }
22868 
22869  if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
22870  return V;
22871 
22872  return SDValue();
22873 }
22874 
22875 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22876  SDValue N2) {
22877  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
22878 
22879  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22880  cast<CondCodeSDNode>(N0.getOperand(2))->get());
22881 
22882  // If we got a simplified select_cc node back from SimplifySelectCC, then
22883  // break it down into a new SETCC node, and a new SELECT node, and then return
22884  // the SELECT node, since we were called with a SELECT node.
22885  if (SCC.getNode()) {
22886  // Check to see if we got a select_cc back (to turn into setcc/select).
22887  // Otherwise, just return whatever node we got back, like fabs.
22888  if (SCC.getOpcode() == ISD::SELECT_CC) {
22889  const SDNodeFlags Flags = N0.getNode()->getFlags();
22890  SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22891  N0.getValueType(),
22892  SCC.getOperand(0), SCC.getOperand(1),
22893  SCC.getOperand(4), Flags);
22894  AddToWorklist(SETCC.getNode());
22895  SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22896  SCC.getOperand(2), SCC.getOperand(3));
22897  SelectNode->setFlags(Flags);
22898  return SelectNode;
22899  }
22900 
22901  return SCC;
22902  }
22903  return SDValue();
22904 }
22905 
22906 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22907 /// being selected between, see if we can simplify the select. Callers of this
22908 /// should assume that TheSelect is deleted if this returns true. As such, they
22909 /// should return the appropriate thing (e.g. the node) back to the top-level of
22910 /// the DAG combiner loop to avoid it being looked at.
22911 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22912  SDValue RHS) {
22913  // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22914  // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22915  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22916  if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22917  // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22918  SDValue Sqrt = RHS;
22919  ISD::CondCode CC;
22920  SDValue CmpLHS;
22921  const ConstantFPSDNode *Zero = nullptr;
22922 
22923  if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22924  CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22925  CmpLHS = TheSelect->getOperand(0);
22926  Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22927  } else {
22928  // SELECT or VSELECT
22929  SDValue Cmp = TheSelect->getOperand(0);
22930  if (Cmp.getOpcode() == ISD::SETCC) {
22931  CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22932  CmpLHS = Cmp.getOperand(0);
22933  Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22934  }
22935  }
22936  if (Zero && Zero->isZero() &&
22937  Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22938  CC == ISD::SETULT || CC == ISD::SETLT)) {
22939  // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22940  CombineTo(TheSelect, Sqrt);
22941  return true;
22942  }
22943  }
22944  }
22945  // Cannot simplify select with vector condition
22946  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22947 
22948  // If this is a select from two identical things, try to pull the operation
22949  // through the select.
22950  if (LHS.getOpcode() != RHS.getOpcode() ||
22951  !LHS.hasOneUse() || !RHS.hasOneUse())
22952  return false;
22953 
22954  // If this is a load and the token chain is identical, replace the select
22955  // of two loads with a load through a select of the address to load from.
22956  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22957  // constants have been dropped into the constant pool.
22958  if (LHS.getOpcode() == ISD::LOAD) {
22959  LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22960  LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22961 
22962  // Token chains must be identical.
22963  if (LHS.getOperand(0) != RHS.getOperand(0) ||
22964  // Do not let this transformation reduce the number of volatile loads.
22965  // Be conservative for atomics for the moment
22966  // TODO: This does appear to be legal for unordered atomics (see D66309)
22967  !LLD->isSimple() || !RLD->isSimple() ||
22968  // FIXME: If either is a pre/post inc/dec load,
22969  // we'd need to split out the address adjustment.
22970  LLD->isIndexed() || RLD->isIndexed() ||
22971  // If this is an EXTLOAD, the VT's must match.
22972  LLD->getMemoryVT() != RLD->getMemoryVT() ||
22973  // If this is an EXTLOAD, the kind of extension must match.
22974  (LLD->getExtensionType() != RLD->getExtensionType() &&
22975  // The only exception is if one of the extensions is anyext.
22976  LLD->getExtensionType() != ISD::EXTLOAD &&
22977  RLD->getExtensionType() != ISD::EXTLOAD) ||
22978  // FIXME: this discards src value information. This is
22979  // over-conservative. It would be beneficial to be able to remember
22980  // both potential memory locations. Since we are discarding
22981  // src value info, don't do the transformation if the memory
22982  // locations are not in the default address space.
22983  LLD->getPointerInfo().getAddrSpace() != 0 ||
22984  RLD->getPointerInfo().getAddrSpace() != 0 ||
22985  // We can't produce a CMOV of a TargetFrameIndex since we won't
22986  // generate the address generation required.
22987  LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22989  !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22990  LLD->getBasePtr().getValueType()))
22991  return false;
22992 
22993  // The loads must not depend on one another.
22994  if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22995  return false;
22996 
22997  // Check that the select condition doesn't reach either load. If so,
22998  // folding this will induce a cycle into the DAG. If not, this is safe to
22999  // xform, so create a select of the addresses.
23000 
23003 
23004  // Always fail if LLD and RLD are not independent. TheSelect is a
23005  // predecessor to all Nodes in question so we need not search past it.
23006 
23007  Visited.insert(TheSelect);
23008  Worklist.push_back(LLD);
23009  Worklist.push_back(RLD);
23010 
23011  if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
23012  SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
23013  return false;
23014 
23015  SDValue Addr;
23016  if (TheSelect->getOpcode() == ISD::SELECT) {
23017  // We cannot do this optimization if any pair of {RLD, LLD} is a
23018  // predecessor to {RLD, LLD, CondNode}. As we've already compared the
23019  // Loads, we only need to check if CondNode is a successor to one of the
23020  // loads. We can further avoid this if there's no use of their chain
23021  // value.
23022  SDNode *CondNode = TheSelect->getOperand(0).getNode();
23023  Worklist.push_back(CondNode);
23024 
23025  if ((LLD->hasAnyUseOfValue(1) &&
23026  SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23027  (RLD->hasAnyUseOfValue(1) &&
23028  SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23029  return false;
23030 
23031  Addr = DAG.getSelect(SDLoc(TheSelect),
23032  LLD->getBasePtr().getValueType(),
23033  TheSelect->getOperand(0), LLD->getBasePtr(),
23034  RLD->getBasePtr());
23035  } else { // Otherwise SELECT_CC
23036  // We cannot do this optimization if any pair of {RLD, LLD} is a
23037  // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
23038  // the Loads, we only need to check if CondLHS/CondRHS is a successor to
23039  // one of the loads. We can further avoid this if there's no use of their
23040  // chain value.
23041 
23042  SDNode *CondLHS = TheSelect->getOperand(0).getNode();
23043  SDNode *CondRHS = TheSelect->getOperand(1).getNode();
23044  Worklist.push_back(CondLHS);
23045  Worklist.push_back(CondRHS);
23046 
23047  if ((LLD->hasAnyUseOfValue(1) &&
23048  SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23049  (RLD->hasAnyUseOfValue(1) &&
23050  SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23051  return false;
23052 
23053  Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
23054  LLD->getBasePtr().getValueType(),
23055  TheSelect->getOperand(0),
23056  TheSelect->getOperand(1),
23057  LLD->getBasePtr(), RLD->getBasePtr(),
23058  TheSelect->getOperand(4));
23059  }
23060 
23061  SDValue Load;
23062  // It is safe to replace the two loads if they have different alignments,
23063  // but the new load must be the minimum (most restrictive) alignment of the
23064  // inputs.
23065  Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
23066  MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
23067  if (!RLD->isInvariant())
23068  MMOFlags &= ~MachineMemOperand::MOInvariant;
23069  if (!RLD->isDereferenceable())
23070  MMOFlags &= ~MachineMemOperand::MODereferenceable;
23071  if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
23072  // FIXME: Discards pointer and AA info.
23073  Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
23074  LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
23075  MMOFlags);
23076  } else {
23077  // FIXME: Discards pointer and AA info.
23078  Load = DAG.getExtLoad(
23079  LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
23080  : LLD->getExtensionType(),
23081  SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
23082  MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
23083  }
23084 
23085  // Users of the select now use the result of the load.
23086  CombineTo(TheSelect, Load);
23087 
23088  // Users of the old loads now use the new load's chain. We know the
23089  // old-load value is dead now.
23090  CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
23091  CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
23092  return true;
23093  }
23094 
23095  return false;
23096 }
23097 
23098 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
23099 /// bitwise 'and'.
23100 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
23101  SDValue N1, SDValue N2, SDValue N3,
23102  ISD::CondCode CC) {
23103  // If this is a select where the false operand is zero and the compare is a
23104  // check of the sign bit, see if we can perform the "gzip trick":
23105  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
23106  // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
23107  EVT XType = N0.getValueType();
23108  EVT AType = N2.getValueType();
23109  if (!isNullConstant(N3) || !XType.bitsGE(AType))
23110  return SDValue();
23111 
23112  // If the comparison is testing for a positive value, we have to invert
23113  // the sign bit mask, so only do that transform if the target has a bitwise
23114  // 'and not' instruction (the invert is free).
23115  if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
23116  // (X > -1) ? A : 0
23117  // (X > 0) ? X : 0 <-- This is canonical signed max.
23118  if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
23119  return SDValue();
23120  } else if (CC == ISD::SETLT) {
23121  // (X < 0) ? A : 0
23122  // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
23123  if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
23124  return SDValue();
23125  } else {
23126  return SDValue();
23127  }
23128 
23129  // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
23130  // constant.
23131  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
23132  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23133  if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
23134  unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
23135  if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23136  SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23137  SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23138  AddToWorklist(Shift.getNode());
23139 
23140  if (XType.bitsGT(AType)) {
23141  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23142  AddToWorklist(Shift.getNode());
23143  }
23144 
23145  if (CC == ISD::SETGT)
23146  Shift = DAG.getNOT(DL, Shift, AType);
23147 
23148  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23149  }
23150  }
23151 
23152  unsigned ShCt = XType.getSizeInBits() - 1;
23153  if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23154  return SDValue();
23155 
23156  SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23157  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23158  AddToWorklist(Shift.getNode());
23159 
23160  if (XType.bitsGT(AType)) {
23161  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23162  AddToWorklist(Shift.getNode());
23163  }
23164 
23165  if (CC == ISD::SETGT)
23166  Shift = DAG.getNOT(DL, Shift, AType);
23167 
23168  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23169 }
23170 
23171 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23172 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23173  SDValue N0 = N->getOperand(0);
23174  SDValue N1 = N->getOperand(1);
23175  SDValue N2 = N->getOperand(2);
23176  EVT VT = N->getValueType(0);
23177  SDLoc DL(N);
23178 
23179  unsigned BinOpc = N1.getOpcode();
23180  if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23181  return SDValue();
23182 
23183  // The use checks are intentionally on SDNode because we may be dealing
23184  // with opcodes that produce more than one SDValue.
23185  // TODO: Do we really need to check N0 (the condition operand of the select)?
23186  // But removing that clause could cause an infinite loop...
23187  if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23188  return SDValue();
23189 
23190  // Binops may include opcodes that return multiple values, so all values
23191  // must be created/propagated from the newly created binops below.
23192  SDVTList OpVTs = N1->getVTList();
23193 
23194  // Fold select(cond, binop(x, y), binop(z, y))
23195  // --> binop(select(cond, x, z), y)
23196  if (N1.getOperand(1) == N2.getOperand(1)) {
23197  SDValue NewSel =
23198  DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23199  SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23200  NewBinOp->setFlags(N1->getFlags());
23201  NewBinOp->intersectFlagsWith(N2->getFlags());
23202  return NewBinOp;
23203  }
23204 
23205  // Fold select(cond, binop(x, y), binop(x, z))
23206  // --> binop(x, select(cond, y, z))
23207  // Second op VT might be different (e.g. shift amount type)
23208  if (N1.getOperand(0) == N2.getOperand(0) &&
23209  VT == N1.getOperand(1).getValueType() &&
23210  VT == N2.getOperand(1).getValueType()) {
23211  SDValue NewSel =
23212  DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
23213  SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
23214  NewBinOp->setFlags(N1->getFlags());
23215  NewBinOp->intersectFlagsWith(N2->getFlags());
23216  return NewBinOp;
23217  }
23218 
23219  // TODO: Handle isCommutativeBinOp patterns as well?
23220  return SDValue();
23221 }
23222 
23223 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
23224 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
23225  SDValue N0 = N->getOperand(0);
23226  EVT VT = N->getValueType(0);
23227  bool IsFabs = N->getOpcode() == ISD::FABS;
23228  bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
23229 
23230  if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
23231  return SDValue();
23232 
23233  SDValue Int = N0.getOperand(0);
23234  EVT IntVT = Int.getValueType();
23235 
23236  // The operand to cast should be integer.
23237  if (!IntVT.isInteger() || IntVT.isVector())
23238  return SDValue();
23239 
23240  // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
23241  // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
23242  APInt SignMask;
23243  if (N0.getValueType().isVector()) {
23244  // For vector, create a sign mask (0x80...) or its inverse (for fabs,
23245  // 0x7f...) per element and splat it.
23247  if (IsFabs)
23248  SignMask = ~SignMask;
23249  SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
23250  } else {
23251  // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
23252  SignMask = APInt::getSignMask(IntVT.getSizeInBits());
23253  if (IsFabs)
23254  SignMask = ~SignMask;
23255  }
23256  SDLoc DL(N0);
23257  Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
23258  DAG.getConstant(SignMask, DL, IntVT));
23259  AddToWorklist(Int.getNode());
23260  return DAG.getBitcast(VT, Int);
23261 }
23262 
23263 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
23264 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
23265 /// in it. This may be a win when the constant is not otherwise available
23266 /// because it replaces two constant pool loads with one.
23267 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
23268  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
23269  ISD::CondCode CC) {
23270  if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
23271  return SDValue();
23272 
23273  // If we are before legalize types, we want the other legalization to happen
23274  // first (for example, to avoid messing with soft float).
23275  auto *TV = dyn_cast<ConstantFPSDNode>(N2);
23276  auto *FV = dyn_cast<ConstantFPSDNode>(N3);
23277  EVT VT = N2.getValueType();
23278  if (!TV || !FV || !TLI.isTypeLegal(VT))
23279  return SDValue();
23280 
23281  // If a constant can be materialized without loads, this does not make sense.
23282  if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
23283  TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
23284  TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
23285  return SDValue();
23286 
23287  // If both constants have multiple uses, then we won't need to do an extra
23288  // load. The values are likely around in registers for other users.
23289  if (!TV->hasOneUse() && !FV->hasOneUse())
23290  return SDValue();
23291 
23292  Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
23293  const_cast<ConstantFP*>(TV->getConstantFPValue()) };
23294  Type *FPTy = Elts[0]->getType();
23295  const DataLayout &TD = DAG.getDataLayout();
23296 
23297  // Create a ConstantArray of the two constants.
23298  Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
23299  SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
23300  TD.getPrefTypeAlign(FPTy));
23301  Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
23302 
23303  // Get offsets to the 0 and 1 elements of the array, so we can select between
23304  // them.
23305  SDValue Zero = DAG.getIntPtrConstant(0, DL);
23306  unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
23307  SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
23308  SDValue Cond =
23309  DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
23310  AddToWorklist(Cond.getNode());
23311  SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
23312  AddToWorklist(CstOffset.getNode());
23313  CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
23314  AddToWorklist(CPIdx.getNode());
23315  return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
23317  DAG.getMachineFunction()), Alignment);
23318 }
23319 
23320 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
23321 /// where 'cond' is the comparison specified by CC.
23322 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
23323  SDValue N2, SDValue N3, ISD::CondCode CC,
23324  bool NotExtCompare) {
23325  // (x ? y : y) -> y.
23326  if (N2 == N3) return N2;
23327 
23328  EVT CmpOpVT = N0.getValueType();
23329  EVT CmpResVT = getSetCCResultType(CmpOpVT);
23330  EVT VT = N2.getValueType();
23331  auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
23332  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23333  auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
23334 
23335  // Determine if the condition we're dealing with is constant.
23336  if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
23337  AddToWorklist(SCC.getNode());
23338  if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
23339  // fold select_cc true, x, y -> x
23340  // fold select_cc false, x, y -> y
23341  return !(SCCC->isZero()) ? N2 : N3;
23342  }
23343  }
23344 
23345  if (SDValue V =
23346  convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
23347  return V;
23348 
23349  if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
23350  return V;
23351 
23352  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
23353  // where y is has a single bit set.
23354  // A plaintext description would be, we can turn the SELECT_CC into an AND
23355  // when the condition can be materialized as an all-ones register. Any
23356  // single bit-test can be materialized as an all-ones register with
23357  // shift-left and shift-right-arith.
23358  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
23359  N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
23360  SDValue AndLHS = N0->getOperand(0);
23361  auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
23362  if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
23363  // Shift the tested bit over the sign bit.
23364  const APInt &AndMask = ConstAndRHS->getAPIntValue();
23365  unsigned ShCt = AndMask.getBitWidth() - 1;
23366  if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
23367  SDValue ShlAmt =
23368  DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
23369  getShiftAmountTy(AndLHS.getValueType()));
23370  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
23371 
23372  // Now arithmetic right shift it all the way over, so the result is
23373  // either all-ones, or zero.
23374  SDValue ShrAmt =
23375  DAG.getConstant(ShCt, SDLoc(Shl),
23377  SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
23378 
23379  return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
23380  }
23381  }
23382  }
23383 
23384  // fold select C, 16, 0 -> shl C, 4
23385  bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
23386  bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
23387 
23388  if ((Fold || Swap) &&
23389  TLI.getBooleanContents(CmpOpVT) ==
23391  (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
23392 
23393  if (Swap) {
23394  CC = ISD::getSetCCInverse(CC, CmpOpVT);
23395  std::swap(N2C, N3C);
23396  }
23397 
23398  // If the caller doesn't want us to simplify this into a zext of a compare,
23399  // don't do it.
23400  if (NotExtCompare && N2C->isOne())
23401  return SDValue();
23402 
23403  SDValue Temp, SCC;
23404  // zext (setcc n0, n1)
23405  if (LegalTypes) {
23406  SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
23407  if (VT.bitsLT(SCC.getValueType()))
23408  Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
23409  else
23410  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23411  } else {
23412  SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
23413  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23414  }
23415 
23416  AddToWorklist(SCC.getNode());
23417  AddToWorklist(Temp.getNode());
23418 
23419  if (N2C->isOne())
23420  return Temp;
23421 
23422  unsigned ShCt = N2C->getAPIntValue().logBase2();
23423  if (TLI.shouldAvoidTransformToShift(VT, ShCt))
23424  return SDValue();
23425 
23426  // shl setcc result by log2 n2c
23427  return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
23428  DAG.getConstant(ShCt, SDLoc(Temp),
23429  getShiftAmountTy(Temp.getValueType())));
23430  }
23431 
23432  // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
23433  // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
23434  // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
23435  // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
23436  // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
23437  // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
23438  // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
23439  // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
23440  if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
23441  SDValue ValueOnZero = N2;
23442  SDValue Count = N3;
23443  // If the condition is NE instead of E, swap the operands.
23444  if (CC == ISD::SETNE)
23445  std::swap(ValueOnZero, Count);
23446  // Check if the value on zero is a constant equal to the bits in the type.
23447  if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
23448  if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
23449  // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
23450  // legal, combine to just cttz.
23451  if ((Count.getOpcode() == ISD::CTTZ ||
23452  Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
23453  N0 == Count.getOperand(0) &&
23454  (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
23455  return DAG.getNode(ISD::CTTZ, DL, VT, N0);
23456  // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
23457  // legal, combine to just ctlz.
23458  if ((Count.getOpcode() == ISD::CTLZ ||
23459  Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
23460  N0 == Count.getOperand(0) &&
23461  (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
23462  return DAG.getNode(ISD::CTLZ, DL, VT, N0);
23463  }
23464  }
23465  }
23466 
23467  // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
23468  // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
23469  if (!NotExtCompare && N1C && N2C && N3C &&
23470  N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
23471  ((N1C->isAllOnes() && CC == ISD::SETGT) ||
23472  (N1C->isZero() && CC == ISD::SETLT)) &&
23473  !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
23474  SDValue ASR = DAG.getNode(
23475  ISD::SRA, DL, CmpOpVT, N0,
23476  DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
23477  return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
23478  DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
23479  }
23480 
23481  if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23482  return S;
23483  if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23484  return S;
23485 
23486  return SDValue();
23487 }
23488 
23489 /// This is a stub for TargetLowering::SimplifySetCC.
23490 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
23491  ISD::CondCode Cond, const SDLoc &DL,
23492  bool foldBooleans) {
23494  DagCombineInfo(DAG, Level, false, this);
23495  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
23496 }
23497 
23498 /// Given an ISD::SDIV node expressing a divide by constant, return
23499 /// a DAG expression to select that will generate the same value by multiplying
23500 /// by a magic number.
23501 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23502 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
23503  // when optimising for minimum size, we don't want to expand a div to a mul
23504  // and a shift.
23506  return SDValue();
23507 
23509  if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
23510  for (SDNode *N : Built)
23511  AddToWorklist(N);
23512  return S;
23513  }
23514 
23515  return SDValue();
23516 }
23517 
23518 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
23519 /// DAG expression that will generate the same value by right shifting.
23520 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
23521  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
23522  if (!C)
23523  return SDValue();
23524 
23525  // Avoid division by zero.
23526  if (C->isZero())
23527  return SDValue();
23528 
23530  if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
23531  for (SDNode *N : Built)
23532  AddToWorklist(N);
23533  return S;
23534  }
23535 
23536  return SDValue();
23537 }
23538 
23539 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
23540 /// expression that will generate the same value by multiplying by a magic
23541 /// number.
23542 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23543 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
23544  // when optimising for minimum size, we don't want to expand a div to a mul
23545  // and a shift.
23547  return SDValue();
23548 
23550  if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
23551  for (SDNode *N : Built)
23552  AddToWorklist(N);
23553  return S;
23554  }
23555 
23556  return SDValue();
23557 }
23558 
23559 /// Determines the LogBase2 value for a non-null input value using the
23560 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
23561 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
23562  EVT VT = V.getValueType();
23563  SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
23564  SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
23565  SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
23566  return LogBase2;
23567 }
23568 
23569 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23570 /// For the reciprocal, we need to find the zero of the function:
23571 /// F(X) = 1/X - A [which has a zero at X = 1/A]
23572 /// =>
23573 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
23574 /// does not require additional intermediate precision]
23575 /// For the last iteration, put numerator N into it to gain more precision:
23576 /// Result = N X_i + X_i (N - N A X_i)
23577 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
23578  SDNodeFlags Flags) {
23579  if (LegalDAG)
23580  return SDValue();
23581 
23582  // TODO: Handle extended types?
23583  EVT VT = Op.getValueType();
23584  if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23585  VT.getScalarType() != MVT::f64)
23586  return SDValue();
23587 
23588  // If estimates are explicitly disabled for this function, we're done.
23589  MachineFunction &MF = DAG.getMachineFunction();
23590  int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
23591  if (Enabled == TLI.ReciprocalEstimate::Disabled)
23592  return SDValue();
23593 
23594  // Estimates may be explicitly enabled for this type with a custom number of
23595  // refinement steps.
23596  int Iterations = TLI.getDivRefinementSteps(VT, MF);
23597  if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
23598  AddToWorklist(Est.getNode());
23599 
23600  SDLoc DL(Op);
23601  if (Iterations) {
23602  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
23603 
23604  // Newton iterations: Est = Est + Est (N - Arg * Est)
23605  // If this is the last iteration, also multiply by the numerator.
23606  for (int i = 0; i < Iterations; ++i) {
23607  SDValue MulEst = Est;
23608 
23609  if (i == Iterations - 1) {
23610  MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23611  AddToWorklist(MulEst.getNode());
23612  }
23613 
23614  SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23615  AddToWorklist(NewEst.getNode());
23616 
23617  NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23618  (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23619  AddToWorklist(NewEst.getNode());
23620 
23621  NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23622  AddToWorklist(NewEst.getNode());
23623 
23624  Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23625  AddToWorklist(Est.getNode());
23626  }
23627  } else {
23628  // If no iterations are available, multiply with N.
23629  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23630  AddToWorklist(Est.getNode());
23631  }
23632 
23633  return Est;
23634  }
23635 
23636  return SDValue();
23637 }
23638 
23639 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23640 /// For the reciprocal sqrt, we need to find the zero of the function:
23641 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23642 /// =>
23643 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23644 /// As a result, we precompute A/2 prior to the iteration loop.
23645 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23646  unsigned Iterations,
23647  SDNodeFlags Flags, bool Reciprocal) {
23648  EVT VT = Arg.getValueType();
23649  SDLoc DL(Arg);
23650  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23651 
23652  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23653  // this entire sequence requires only one FP constant.
23654  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23655  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23656 
23657  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23658  for (unsigned i = 0; i < Iterations; ++i) {
23659  SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23660  NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23661  NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23662  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23663  }
23664 
23665  // If non-reciprocal square root is requested, multiply the result by Arg.
23666  if (!Reciprocal)
23667  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23668 
23669  return Est;
23670 }
23671 
23672 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23673 /// For the reciprocal sqrt, we need to find the zero of the function:
23674 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23675 /// =>
23676 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23677 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23678  unsigned Iterations,
23679  SDNodeFlags Flags, bool Reciprocal) {
23680  EVT VT = Arg.getValueType();
23681  SDLoc DL(Arg);
23682  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23683  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23684 
23685  // This routine must enter the loop below to work correctly
23686  // when (Reciprocal == false).
23687  assert(Iterations > 0);
23688 
23689  // Newton iterations for reciprocal square root:
23690  // E = (E * -0.5) * ((A * E) * E + -3.0)
23691  for (unsigned i = 0; i < Iterations; ++i) {
23692  SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23693  SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23694  SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23695 
23696  // When calculating a square root at the last iteration build:
23697  // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23698  // (notice a common subexpression)
23699  SDValue LHS;
23700  if (Reciprocal || (i + 1) < Iterations) {
23701  // RSQRT: LHS = (E * -0.5)
23702  LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23703  } else {
23704  // SQRT: LHS = (A * E) * -0.5
23705  LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23706  }
23707 
23708  Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23709  }
23710 
23711  return Est;
23712 }
23713 
23714 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23715 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23716 /// Op can be zero.
23717 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23718  bool Reciprocal) {
23719  if (LegalDAG)
23720  return SDValue();
23721 
23722  // TODO: Handle extended types?
23723  EVT VT = Op.getValueType();
23724  if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23725  VT.getScalarType() != MVT::f64)
23726  return SDValue();
23727 
23728  // If estimates are explicitly disabled for this function, we're done.
23729  MachineFunction &MF = DAG.getMachineFunction();
23730  int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23731  if (Enabled == TLI.ReciprocalEstimate::Disabled)
23732  return SDValue();
23733 
23734  // Estimates may be explicitly enabled for this type with a custom number of
23735  // refinement steps.
23736  int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23737 
23738  bool UseOneConstNR = false;
23739  if (SDValue Est =
23740  TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23741  Reciprocal)) {
23742  AddToWorklist(Est.getNode());
23743 
23744  if (Iterations)
23745  Est = UseOneConstNR
23746  ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23747  : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23748  if (!Reciprocal) {
23749  SDLoc DL(Op);
23750  // Try the target specific test first.
23751  SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23752 
23753  // The estimate is now completely wrong if the input was exactly 0.0 or
23754  // possibly a denormal. Force the answer to 0.0 or value provided by
23755  // target for those cases.
23756  Est = DAG.getNode(
23757  Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23758  Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
23759  }
23760  return Est;
23761  }
23762 
23763  return SDValue();
23764 }
23765 
23766 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23767  return buildSqrtEstimateImpl(Op, Flags, true);
23768 }
23769 
23770 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23771  return buildSqrtEstimateImpl(Op, Flags, false);
23772 }
23773 
23774 /// Return true if there is any possibility that the two addresses overlap.
23775 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
23776 
23777  struct MemUseCharacteristics {
23778  bool IsVolatile;
23779  bool IsAtomic;
23780  SDValue BasePtr;
23781  int64_t Offset;
23782  Optional<int64_t> NumBytes;
23783  MachineMemOperand *MMO;
23784  };
23785 
23786  auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23787  if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23788  int64_t Offset = 0;
23789  if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23790  Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23791  ? C->getSExtValue()
23792  : (LSN->getAddressingMode() == ISD::PRE_DEC)
23793  ? -1 * C->getSExtValue()
23794  : 0;
23795  uint64_t Size =
23796  MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23797  return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23798  Offset /*base offset*/,
23800  LSN->getMemOperand()};
23801  }
23802  if (const auto *LN = cast<LifetimeSDNode>(N))
23803  return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23804  (LN->hasOffset()) ? LN->getOffset() : 0,
23805  (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23806  : Optional<int64_t>(),
23807  (MachineMemOperand *)nullptr};
23808  // Default.
23809  return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23810  (int64_t)0 /*offset*/,
23811  Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23812  };
23813 
23814  MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23815  MUC1 = getCharacteristics(Op1);
23816 
23817  // If they are to the same address, then they must be aliases.
23818  if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23819  MUC0.Offset == MUC1.Offset)
23820  return true;
23821 
23822  // If they are both volatile then they cannot be reordered.
23823  if (MUC0.IsVolatile && MUC1.IsVolatile)
23824  return true;
23825 
23826  // Be conservative about atomics for the moment
23827  // TODO: This is way overconservative for unordered atomics (see D66309)
23828  if (MUC0.IsAtomic && MUC1.IsAtomic)
23829  return true;
23830 
23831  if (MUC0.MMO && MUC1.MMO) {
23832  if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23833  (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23834  return false;
23835  }
23836 
23837  // Try to prove that there is aliasing, or that there is no aliasing. Either
23838  // way, we can return now. If nothing can be proved, proceed with more tests.
23839  bool IsAlias;
23840  if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23841  DAG, IsAlias))
23842  return IsAlias;
23843 
23844  // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23845  // either are not known.
23846  if (!MUC0.MMO || !MUC1.MMO)
23847  return true;
23848 
23849  // If one operation reads from invariant memory, and the other may store, they
23850  // cannot alias. These should really be checking the equivalent of mayWrite,
23851  // but it only matters for memory nodes other than load /store.
23852  if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23853  (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23854  return false;
23855 
23856  // If we know required SrcValue1 and SrcValue2 have relatively large
23857  // alignment compared to the size and offset of the access, we may be able
23858  // to prove they do not alias. This check is conservative for now to catch
23859  // cases created by splitting vector types, it only works when the offsets are
23860  // multiples of the size of the data.
23861  int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23862  int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23863  Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23864  Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23865  auto &Size0 = MUC0.NumBytes;
23866  auto &Size1 = MUC1.NumBytes;
23867  if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23868  Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23869  OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23870  SrcValOffset1 % *Size1 == 0) {
23871  int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23872  int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23873 
23874  // There is no overlap between these relatively aligned accesses of
23875  // similar size. Return no alias.
23876  if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23877  return false;
23878  }
23879 
23882  : DAG.getSubtarget().useAA();
23883 #ifndef NDEBUG
23884  if (CombinerAAOnlyFunc.getNumOccurrences() &&
23886  UseAA = false;
23887 #endif
23888 
23889  if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23890  Size0.hasValue() && Size1.hasValue()) {
23891  // Use alias analysis information.
23892  int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23893  int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23894  int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23895  if (AA->isNoAlias(
23896  MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23897  UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23898  MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23899  UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23900  return false;
23901  }
23902 
23903  // Otherwise we have to assume they alias.
23904  return true;
23905 }
23906 
23907 /// Walk up chain skipping non-aliasing memory nodes,
23908 /// looking for aliasing nodes and adding them to the Aliases vector.
23909 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23910  SmallVectorImpl<SDValue> &Aliases) {
23911  SmallVector<SDValue, 8> Chains; // List of chains to visit.
23912  SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
23913 
23914  // Get alias information for node.
23915  // TODO: relax aliasing for unordered atomics (see D66309)
23916  const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23917 
23918  // Starting off.
23919  Chains.push_back(OriginalChain);
23920  unsigned Depth = 0;
23921 
23922  // Attempt to improve chain by a single step
23923  std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23924  switch (C.getOpcode()) {
23925  case ISD::EntryToken:
23926  // No need to mark EntryToken.
23927  C = SDValue();
23928  return true;
23929  case ISD::LOAD:
23930  case ISD::STORE: {
23931  // Get alias information for C.
23932  // TODO: Relax aliasing for unordered atomics (see D66309)
23933  bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23934  cast<LSBaseSDNode>(C.getNode())->isSimple();
23935  if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
23936  // Look further up the chain.
23937  C = C.getOperand(0);
23938  return true;
23939  }
23940  // Alias, so stop here.
23941  return false;
23942  }
23943 
23944  case ISD::CopyFromReg:
23945  // Always forward past past CopyFromReg.
23946  C = C.getOperand(0);
23947  return true;
23948 
23949  case ISD::LIFETIME_START:
23950  case ISD::LIFETIME_END: {
23951  // We can forward past any lifetime start/end that can be proven not to
23952  // alias the memory access.
23953  if (!mayAlias(N, C.getNode())) {
23954  // Look further up the chain.
23955  C = C.getOperand(0);
23956  return true;
23957  }
23958  return false;
23959  }
23960  default:
23961  return false;
23962  }
23963  };
23964 
23965  // Look at each chain and determine if it is an alias. If so, add it to the
23966  // aliases list. If not, then continue up the chain looking for the next
23967  // candidate.
23968  while (!Chains.empty()) {
23969  SDValue Chain = Chains.pop_back_val();
23970 
23971  // Don't bother if we've seen Chain before.
23972  if (!Visited.insert(Chain.getNode()).second)
23973  continue;
23974 
23975  // For TokenFactor nodes, look at each operand and only continue up the
23976  // chain until we reach the depth limit.
23977  //
23978  // FIXME: The depth check could be made to return the last non-aliasing
23979  // chain we found before we hit a tokenfactor rather than the original
23980  // chain.
23981  if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23982  Aliases.clear();
23983  Aliases.push_back(OriginalChain);
23984  return;
23985  }
23986 
23987  if (Chain.getOpcode() == ISD::TokenFactor) {
23988  // We have to check each of the operands of the token factor for "small"
23989  // token factors, so we queue them up. Adding the operands to the queue
23990  // (stack) in reverse order maintains the original order and increases the
23991  // likelihood that getNode will find a matching token factor (CSE.)
23992  if (Chain.getNumOperands() > 16) {
23993  Aliases.push_back(Chain);
23994  continue;
23995  }
23996  for (unsigned n = Chain.getNumOperands(); n;)
23997  Chains.push_back(Chain.getOperand(--n));
23998  ++Depth;
23999  continue;
24000  }
24001  // Everything else
24002  if (ImproveChain(Chain)) {
24003  // Updated Chain Found, Consider new chain if one exists.
24004  if (Chain.getNode())
24005  Chains.push_back(Chain);
24006  ++Depth;
24007  continue;
24008  }
24009  // No Improved Chain Possible, treat as Alias.
24010  Aliases.push_back(Chain);
24011  }
24012 }
24013 
24014 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
24015 /// (aliasing node.)
24016 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
24017  if (OptLevel == CodeGenOpt::None)
24018  return OldChain;
24019 
24020  // Ops for replacing token factor.
24021  SmallVector<SDValue, 8> Aliases;
24022 
24023  // Accumulate all the aliases to this node.
24024  GatherAllAliases(N, OldChain, Aliases);
24025 
24026  // If no operands then chain to entry token.
24027  if (Aliases.size() == 0)
24028  return DAG.getEntryNode();
24029 
24030  // If a single operand then chain to it. We don't need to revisit it.
24031  if (Aliases.size() == 1)
24032  return Aliases[0];
24033 
24034  // Construct a custom tailored token factor.
24035  return DAG.getTokenFactor(SDLoc(N), Aliases);
24036 }
24037 
24038 namespace {
24039 // TODO: Replace with with std::monostate when we move to C++17.
24040 struct UnitT { } Unit;
24041 bool operator==(const UnitT &, const UnitT &) { return true; }
24042 bool operator!=(const UnitT &, const UnitT &) { return false; }
24043 } // namespace
24044 
24045 // This function tries to collect a bunch of potentially interesting
24046 // nodes to improve the chains of, all at once. This might seem
24047 // redundant, as this function gets called when visiting every store
24048 // node, so why not let the work be done on each store as it's visited?
24049 //
24050 // I believe this is mainly important because mergeConsecutiveStores
24051 // is unable to deal with merging stores of different sizes, so unless
24052 // we improve the chains of all the potential candidates up-front
24053 // before running mergeConsecutiveStores, it might only see some of
24054 // the nodes that will eventually be candidates, and then not be able
24055 // to go from a partially-merged state to the desired final
24056 // fully-merged state.
24057 
24058 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
24059  SmallVector<StoreSDNode *, 8> ChainedStores;
24060  StoreSDNode *STChain = St;
24061  // Intervals records which offsets from BaseIndex have been covered. In
24062  // the common case, every store writes to the immediately previous address
24063  // space and thus merged with the previous interval at insertion time.
24064 
24065  using IMap =
24068  IMap Intervals(A);
24069 
24070  // This holds the base pointer, index, and the offset in bytes from the base
24071  // pointer.
24073 
24074  // We must have a base and an offset.
24075  if (!BasePtr.getBase().getNode())
24076  return false;
24077 
24078  // Do not handle stores to undef base pointers.
24079  if (BasePtr.getBase().isUndef())
24080  return false;
24081 
24082  // Do not handle stores to opaque types
24083  if (St->getMemoryVT().isZeroSized())
24084  return false;
24085 
24086  // BaseIndexOffset assumes that offsets are fixed-size, which
24087  // is not valid for scalable vectors where the offsets are
24088  // scaled by `vscale`, so bail out early.
24089  if (St->getMemoryVT().isScalableVector())
24090  return false;
24091 
24092  // Add ST's interval.
24093  Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
24094 
24095  while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
24096  if (Chain->getMemoryVT().isScalableVector())
24097  return false;
24098 
24099  // If the chain has more than one use, then we can't reorder the mem ops.
24100  if (!SDValue(Chain, 0)->hasOneUse())
24101  break;
24102  // TODO: Relax for unordered atomics (see D66309)
24103  if (!Chain->isSimple() || Chain->isIndexed())
24104  break;
24105 
24106  // Find the base pointer and offset for this memory node.
24107  const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
24108  // Check that the base pointer is the same as the original one.
24109  int64_t Offset;
24110  if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
24111  break;
24112  int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
24113  // Make sure we don't overlap with other intervals by checking the ones to
24114  // the left or right before inserting.
24115  auto I = Intervals.find(Offset);
24116  // If there's a next interval, we should end before it.
24117  if (I != Intervals.end() && I.start() < (Offset + Length))
24118  break;
24119  // If there's a previous interval, we should start after it.
24120  if (I != Intervals.begin() && (--I).stop() <= Offset)
24121  break;
24122  Intervals.insert(Offset, Offset + Length, Unit);
24123 
24124  ChainedStores.push_back(Chain);
24125  STChain = Chain;
24126  }
24127 
24128  // If we didn't find a chained store, exit.
24129  if (ChainedStores.size() == 0)
24130  return false;
24131 
24132  // Improve all chained stores (St and ChainedStores members) starting from
24133  // where the store chain ended and return single TokenFactor.
24134  SDValue NewChain = STChain->getChain();
24136  for (unsigned I = ChainedStores.size(); I;) {
24137  StoreSDNode *S = ChainedStores[--I];
24138  SDValue BetterChain = FindBetterChain(S, NewChain);
24139  S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
24140  S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24141  TFOps.push_back(SDValue(S, 0));
24142  ChainedStores[I] = S;
24143  }
24144 
24145  // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24146  SDValue BetterChain = FindBetterChain(St, NewChain);
24147  SDValue NewST;
24148  if (St->isTruncatingStore())
24149  NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24150  St->getBasePtr(), St->getMemoryVT(),
24151  St->getMemOperand());
24152  else
24153  NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24154  St->getBasePtr(), St->getMemOperand());
24155 
24156  TFOps.push_back(NewST);
24157 
24158  // If we improved every element of TFOps, then we've lost the dependence on
24159  // NewChain to successors of St and we need to add it back to TFOps. Do so at
24160  // the beginning to keep relative order consistent with FindBetterChains.
24161  auto hasImprovedChain = [&](SDValue ST) -> bool {
24162  return ST->getOperand(0) != NewChain;
24163  };
24164  bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
24165  if (AddNewChain)
24166  TFOps.insert(TFOps.begin(), NewChain);
24167 
24168  SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
24169  CombineTo(St, TF);
24170 
24171  // Add TF and its operands to the worklist.
24172  AddToWorklist(TF.getNode());
24173  for (const SDValue &Op : TF->ops())
24174  AddToWorklist(Op.getNode());
24175  AddToWorklist(STChain);
24176  return true;
24177 }
24178 
24179 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24180  if (OptLevel == CodeGenOpt::None)
24181  return false;
24182 
24184 
24185  // We must have a base and an offset.
24186  if (!BasePtr.getBase().getNode())
24187  return false;
24188 
24189  // Do not handle stores to undef base pointers.
24190  if (BasePtr.getBase().isUndef())
24191  return false;
24192 
24193  // Directly improve a chain of disjoint stores starting at St.
24194  if (parallelizeChainedStores(St))
24195  return true;
24196 
24197  // Improve St's Chain..
24198  SDValue BetterChain = FindBetterChain(St, St->getChain());
24199  if (St->getChain() != BetterChain) {
24200  replaceStoreChain(St, BetterChain);
24201  return true;
24202  }
24203  return false;
24204 }
24205 
24206 /// This is the entry point for the file.
24208  CodeGenOpt::Level OptLevel) {
24209  /// This is the main entry point to this class.
24210  DAGCombiner(*this, AA, OptLevel).Run(Level);
24211 }
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static uint64_t * getMemory(unsigned numWords)
A utility function for allocating memory and checking for allocation failure.
Definition: APInt.cpp:43
This file implements a class to represent arbitrary precision integral constant values and operations...
@ Scaled
This file contains the simple types necessary to represent the attributes associated with functions a...
SmallVector< MachineOperand, 4 > Cond
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:294
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static Optional< bool > isBigEndian(const ArrayRef< int64_t > ByteOffsets, int64_t FirstOffset)
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static const Optional< ByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, bool Root=false)
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propapagtion pattern try to break it up to generate somet...
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI)
Return true if 'Use' is a load or a store that uses N as its base pointer and that N may be folded in...
static unsigned bigEndianByteAt(unsigned BW, unsigned i)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG)
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static SDValue stripTruncAndExt(SDValue Value)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index, bool Scaled, SelectionDAG &DAG)
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V)
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue Carry0, SDValue Carry1, SDNode *N)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
Generate Min/Max node.
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static unsigned littleEndianByteAt(unsigned BW, unsigned i)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static ManagedStatic< DebugCounter > DC
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Offset
uint64_t Addr
uint32_t Index
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1202
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
iv Induction Variable Users
Definition: IVUsers.cpp:52
This file implements a coalescing interval map for small objects.
static void removeFromWorklist(Instruction *I, std::vector< Instruction * > &Worklist)
Remove all instances of I from the worklist vector specified.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
print Print MemDeps of function
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define T1
uint64_t CallInst * C
This file provides None, an enumerator for use in implicit constructors of various (usually templated...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
This file provides Optional, a template class modeled in the spirit of OCaml's 'opt' variant.
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
ManagedStatic< detail::RecordContext > Context
Definition: Record.cpp:94
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This file describes how to lower LLVM code to machine code.
vector combine
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
static constexpr int Concat[]
Value * RHS
Value * LHS
BinaryOperator * Mul
const fltSemantics & getSemantics() const
Definition: APFloat.h:1222
bool isNegative() const
Definition: APFloat.h:1214
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1197
bool isNaN() const
Definition: APFloat.h:1212
APInt bitcastToAPInt() const
Definition: APFloat.h:1129
bool isLargest() const
Definition: APFloat.h:1228
bool isInfinity() const
Definition: APFloat.h:1211
Class for arbitrary precision integers.
Definition: APInt.h:75
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1961
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1748
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:434
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:950
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:408
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1467
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:968
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1436
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:881
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1281
APInt abs() const
Get the absolute value.
Definition: APInt.h:1682
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:347
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1115
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:241
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:447
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1656
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1412
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1044
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:312
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1182
int32_t exactLogBase2() const
Definition: APInt.h:1670
APInt sextOrSelf(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:996
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:595
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1539
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1500
unsigned logBase2() const
Definition: APInt.h:1648
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:456
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:452
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1950
bool isMask(unsigned numBits) const
Definition: APInt.h:469
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:926
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:837
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1190
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:425
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:990
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:444
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1555
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:371
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:269
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1479
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:822
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:815
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:638
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ISD::CondCode get() const
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
const APFloat & getValueAPF() const
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX)
uint64_t getZExtValue() const
int64_t getSExtValue() const
const APInt & getAPIntValue() const
const ConstantInt * getConstantIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:244
bool isBigEndian() const
Definition: DataLayout.h:245
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:506
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:838
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:151
bool erase(const KeyT &Val)
Definition: DenseMap.h:303
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:208
bool isScalar() const
Counting predicates.
Definition: TypeSize.h:395
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:658
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:316
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:624
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:305
LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:360
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
This class is used to represent ISD::LOAD nodes.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
This is a base class used to represent MGATHER and MSCATTER nodes.
const SDValue & getScale() const
void setIndexType(ISD::MemIndexType IndexType)
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getMask() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
const SDValue & getValue() const
const SDValue & getMask() const
const SDValue & getChain() const
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
const MachinePointerInfo & getPointerInfo() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
const SDValue & getBasePtr() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
unsigned getAlignment() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
static uint64_t getSizeOrUnknown(const TypeSize &T)
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:283
constexpr bool hasValue() const
Definition: Optional.h:289
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
const SDValue & getOperand(unsigned Num) const
ArrayRef< SDUse > ops() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< use_iterator > uses()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool isUndef() const
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOpt::Level OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:329
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
bool isKnownNeverZero(SDValue Op) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
Definition: SelectionDAG.h:956
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, unsigned Num)
Like ReplaceAllUsesOfValueWith, but for multiple values at once.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:522
bool shouldOptForSize() const
OverflowKind computeOverflowKind(SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 node can overflow.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:442
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits)
See if the specified operand can be simplified with the knowledge that only the bits specified by Dem...
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:622
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isKnownToBeAPowerOfTwo(SDValue Val) const
Test if the given value is known to have exactly one bit set.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:824
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:443
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:505
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOpt::Level OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:807
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:513
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:444
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:441
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:40
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:232
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
ArrayRef< int > getMask() const
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:93
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:308
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:73
size_t size() const
Definition: SmallVector.h:70
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:554
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:688
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
void reserve(size_type N)
Definition: SmallVector.h:644
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:792
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
void resize(size_type N)
Definition: SmallVector.h:619
void push_back(const T &Elt)
Definition: SmallVector.h:400
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:268
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1183
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isMulAddWithConstProfitable(const SDValue &AddNode, const SDValue &ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool hasBitPreservingFPLogic(EVT VT) const
Return true if it is safe to transform an integer-domain bitwise operation into the equivalent floati...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOpt::Level OptLevel) const
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
virtual bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
virtual bool isNarrowingProfitable(EVT, EVT) const
Return true if it's profitable to narrow operations of type VT1 to VT2.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount though its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
ScalarTy getFixedSize() const
Definition: TypeSize.h:425
static TypeSize Fixed(ScalarTy MinVal)
Definition: TypeSize.h:422
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
int getNumOccurrences() const
Definition: CommandLine.h:402
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2138
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2133
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:675
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:462
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1243
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:535
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:666
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:354
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:470
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:910
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:925
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:360
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:739
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:519
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1232
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:640
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:769
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1242
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:463
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:862
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:852
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1253
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:583
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1229
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:674
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1233
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:290
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:710
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:870
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:967
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:314
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:609
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:336
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1244
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:590
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:310
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1237
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:614
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:563
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:549
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:909
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:729
@ TargetConstantFP
Definition: ISDOpcodes.h:159
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:805
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:694
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1206
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:902
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:346
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:318
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:758
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:626
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1181
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:837
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:688
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1075
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1245
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:915
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:260
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1230
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:387
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:818
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:780
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:804
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:960
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:327
@ AssertZext
Definition: ISDOpcodes.h:62
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:300
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant BUI...
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
Optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
@ SIGNED_UNSCALED
Definition: ISDOpcodes.h:1333
@ UNSIGNED_UNSCALED
Definition: ISDOpcodes.h:1335
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1334
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1403
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1319
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1370
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1350
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
@ VecLoad
Definition: NVPTX.h:85
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
constexpr double e
Definition: MathExtras.h:57
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
/file This file defines the SmallVector class.
Definition: AllocatorList.h:22
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:960
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:683
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:609
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:339
void stable_sort(R &&Range)
Definition: STLExtras.h:1719
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1604
STATISTIC(NumFunctions, "Total number of functions")
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1584
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1986
bool isConstantOrConstantVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a build vector of constant integers.
Definition: Utils.cpp:1107
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:455
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:138
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1988
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:338
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:359
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:602
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:702
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
unsigned M1(unsigned Val)
Definition: VE.h:371
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1591
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
const NoneType None
Definition: None.h:24
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:340
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1598
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1529
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:525
bool isNullOrNullSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1751
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ FAdd
Sum of floats.
unsigned M0(unsigned Val)
Definition: VE.h:370
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
std::enable_if_t<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > cast(const Y &Val)
Definition: Casting.h:254
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1642
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1674
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1649
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:207
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:337
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:189
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:211
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:205
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:363
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:273
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:140
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:323
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:216
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:332
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:432
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:374
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:349
bool isFixedLengthVector() const
Definition: ValueTypes.h:165
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:265
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:229
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:221
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:161
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:135
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:150
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:125
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:281
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:145
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:35
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:226
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:298
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const
bool hasNoNaNs() const
bool hasNoSignedZeros() const
bool hasApproximateFuncs() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:288
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...